[SCM] intel-vaapi-driver/upstream: Imported Upstream version 1.0.19

mfv-guest at users.alioth.debian.org mfv-guest at users.alioth.debian.org
Tue Jan 15 15:26:45 UTC 2013


The following commit has been merged in the upstream branch:
commit c4f72d5a025d8139d20e333ee60891bf21d14ad4
Author: Matteo F. Vescovi <mfv.debian at gmail.com>
Date:   Tue Jan 15 16:23:52 2013 +0100

    Imported Upstream version 1.0.19

diff --git a/NEWS b/NEWS
index edb7d75..1db2047 100644
--- a/NEWS
+++ b/NEWS
@@ -1,15 +1,41 @@
-libva-driver-intel NEWS -- summary of changes.  2012-02-DD
+libva-intel-driver NEWS -- summary of changes.  2012-11-09
 Copyright (C) 2009-2011 Intel Corporation
 
-Version 1.0.16 - DD.Feb.2012
-* Fix MPEG-2 decoding of interlaced streams (SNB, IVB)
+Version 1.0.19 - 09.Nov.2012
+* Add support for Haswell
+* Add raw DRM support (Dmitry Ermilov)
+* Add Wayland support
+* Add support for display rotation attribute
+* Support 4K encoding on IVB and HSW
+* Drop explicit dependency on X11 and libva-x11
+* Fix VC-1 decoding when VSTRANSFORM is 0
+* Fix SIGSEGV caused by use-after-free of the bufmgr (Stéphane Marchesin)
+* Fix thread safety issue (Gautam)
+* Fix vaUnlockSurface() for libva trace
+
+Version 1.0.18 - 02.Aug.2012
+* Add JPEG decoding on Ivy Bridge
+* Add support for a new Ivy Bridge chip
+* Add support for vaSyncSurface() and vaQuerySurfaceStatus() (Dmitry Ermilov)
+* Fix decoding of MPEG-2 videos with implicit IQ matrices
+* Fix concurrent creation of VA objects (MT safety)
+* Fix decoding of large resolution videos (up to 4K on IVB)
+
+Version 1.0.17 - 02.Apr.2012
+* Add support for IMC1/IMC3 surface formats
 * Fix rendering of interlaced surfaces
+* Fix MPEG-2 decoding of interlaced streams (SNB, IVB)
+* Fix H.264 weighted prediction indicator (SNB)
+* Fix and simplify calculation of H.264 macroblock bit offset (ILK, SNB, IVB)
+
+Version 1.0.16 - 14.Feb.2012
 * Fix VC-1 bitplane buffer size (SNB, IVB)
 * Fix VC-1 motion vector modes for Ivy Bridge
-* Fix weighted prediction indicator for Sandy Bridge
 * Fix MFX_QM_STATE for H.264 flat scaling lists (IVB)
 * Fix and simplify AVC_REF_IDX_STATE setup (ILK, SNB, IVB)
-* Fix and simplify first macroblock bit offset calculation (ILK, SNB, IVB)
+* Fix memory leak of encoder buffers
+* Fix check for internal VA surface format prior to rendering
+* Add support for B43 chipset (Alexander Inyukhin)
 
 Version 1.0.15 - 28.Oct.2011
 * Add auto-generated Debian packaging
diff --git a/README b/README
index d681d42..09ee0b3 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
 
-  libva-driver-intel
+  libva-intel-driver
   VA driver for Intel G45 & HD Graphics family
 
   Copyright (C) 2009-2011 Intel Corporation
@@ -14,7 +14,7 @@ Please read the COPYING file available in this package.
 Overview
 --------
 
-libva-driver-intel is the VA-API implementation for Intel G45 chipsets
+libva-intel-driver is the VA-API implementation for Intel G45 chipsets
 and Intel HD Graphics for Intel Core processor family.
 
 Platform definitions:
@@ -36,4 +36,4 @@ VC-1    D   SNB+
 Requirements
 ------------
 
-libva >= 1.0.14
+libva >= 1.0.16
diff --git a/configure.ac b/configure.ac
index d2481d0..7a9e097 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
 # intel-driver package version number
 m4_define([intel_driver_major_version], [1])
 m4_define([intel_driver_minor_version], [0])
-m4_define([intel_driver_micro_version], [18])
+m4_define([intel_driver_micro_version], [19])
 m4_define([intel_driver_pre_version],   [0])
 m4_define([intel_driver_version],
           [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version])
@@ -11,15 +11,16 @@ m4_append([intel_driver_version], intel_driver_pre_version, [.pre])
 
 # libva minimum version requirement
 m4_define([libva_package_version], [1.0.14])
+m4_define([va_api_version], [0.32.0])
 
 # libdrm minimum version requirement
 m4_define([libdrm_version], [2.4.23])
 
 AC_PREREQ([2.57])
 AC_INIT([intel_driver], [intel_driver_version], [haihao.xiang at intel.com],
-        [libva-driver-intel])
+        [libva-intel-driver])
 AC_CONFIG_SRCDIR([Makefile.am])
-AM_INIT_AUTOMAKE
+AM_INIT_AUTOMAKE([1.9 tar-ustar])
 
 AM_CONFIG_HEADER([src/config.h])
 
@@ -40,6 +41,21 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])], [
     AC_SUBST(AM_DEFAULT_VERBOSITY)
 ])
 
+AC_ARG_ENABLE(drm,
+    [AC_HELP_STRING([--enable-drm],
+                    [build with VA/DRM API support @<:@default=yes@:>@])],
+    [], [enable_drm="yes"])
+
+AC_ARG_ENABLE(x11,
+    [AC_HELP_STRING([--enable-x11],
+                    [build with VA/X11 API support @<:@default=yes@:>@])],
+    [], [enable_x11="yes"])
+
+AC_ARG_ENABLE([wayland],
+    [AC_HELP_STRING([--enable-wayland],
+                    [build with VA/Wayland API support @<:@default=yes@:>@])],
+    [], [enable_wayland="yes"])
+
 AC_DISABLE_STATIC
 AC_PROG_LIBTOOL
 AC_PROG_CC
@@ -58,13 +74,29 @@ PKG_CHECK_MODULES([DRM], [libdrm >= $LIBDRM_VERSION])
 AC_SUBST(LIBDRM_VERSION)
 
 dnl Check for gen4asm
-PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.2], [gen4asm=yes], [gen4asm=no])
+PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.3], [gen4asm=yes], [gen4asm=no])
 AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes)
 AC_PATH_PROG([GEN4ASM], [intel-gen4asm])
 
 dnl Check for VA-API
-PKG_CHECK_MODULES(LIBVA_DEPS,	  [libva])
-PKG_CHECK_MODULES(LIBVA_X11_DEPS, [libva-x11])
+PKG_CHECK_MODULES(LIBVA_DEPS,     [libva >= va_api_version])
+
+dnl Check for VA/DRM API
+USE_DRM="$enable_drm"
+if test "$USE_DRM" = "yes"; then
+    PKG_CHECK_MODULES(LIBVA_DRM_DEPS, [libva-drm],
+      [AC_DEFINE([HAVE_VA_DRM], [1], [Defined to 1 if VA/DRM API is enabled])],
+      [USE_DRM="no"])
+
+    # Check for <drm_fourcc.h>
+    if test "$USE_DRM" = "yes"; then
+        saved_CPPFLAGS="$CPPFLAGS"
+        CPPFLAGS="$CPPFLAGS $DRM_CFLAGS"
+        AC_CHECK_HEADERS([drm_fourcc.h], [:], [USE_DRM="no"])
+        CPPFLAGS="$saved_CPPFLAGS"
+    fi
+fi
+AM_CONDITIONAL(USE_DRM, test "$USE_DRM" = "yes")
 
 VA_VERSION=`$PKG_CONFIG --modversion libva`
 VA_MAJOR_VERSION=`echo "$VA_VERSION" | cut -d'.' -f1`
@@ -80,6 +112,15 @@ VA_DRIVER_INIT_FUNC="__vaDriverInit_${VA_MAJOR_VERSION}_${VA_MINOR_VERSION}"
 AC_DEFINE_UNQUOTED([VA_DRIVER_INIT_FUNC], [$VA_DRIVER_INIT_FUNC],
     [Define driver entry-point])
 
+dnl Check for VA/DRM API
+USE_X11="$enable_x11"
+if test "$USE_X11" = "yes"; then
+    PKG_CHECK_MODULES(LIBVA_X11_DEPS, [libva-x11],
+      [AC_DEFINE([HAVE_VA_X11], [1], [Defined to 1 if VA/X11 API is enabled])],
+      [USE_X11="no"])
+fi
+AM_CONDITIONAL(USE_X11, test "$USE_X11" = "yes")
+
 dnl Check for VA-API drivers path
 AC_MSG_CHECKING([for VA drivers path])
 LIBVA_DRIVERS_PATH=`$PKG_CONFIG libva --variable driverdir`
@@ -89,25 +130,60 @@ fi
 AC_MSG_RESULT([$LIBVA_DRIVERS_PATH])
 AC_SUBST(LIBVA_DRIVERS_PATH)
 
+# Check for EGL
+if test "$enable_wayland" = "yes"; then
+    enable_egl="yes"
+fi
+
+USE_EGL="no"
+if test "$enable_egl" = "yes"; then
+    PKG_CHECK_MODULES([EGL], [egl], [USE_EGL="yes"], [USE_EGL="no"])
+    saved_CPPFLAGS="$CPPFLAGS"
+    saved_LIBS="$LIBS"
+    CPPFLAGS="$CPPFLAGS $EGL_CFLAGS"
+    LIBS="$LIBS $EGL_LIBS"
+    AC_CHECK_HEADERS([EGL/egl.h], [:], [USE_EGL="no"])
+    AC_CHECK_LIB([EGL], [eglGetDisplay], [:], [USE_EGL="no"])
+    CPPFLAGS="$saved_CPPFLAGS"
+    LIBS="$saved_LIBS"
+fi
+AM_CONDITIONAL(USE_EGL, test "$USE_EGL" = "yes")
+
+# Check for Wayland
+USE_WAYLAND="no"
+if test "$enable_wayland" = "yes"; then
+    PKG_CHECK_MODULES([WAYLAND], [wayland-client], [USE_WAYLAND="yes"], [:])
+    PKG_CHECK_MODULES([LIBVA_WAYLAND_DEPS], [libva-wayland],
+        [AC_DEFINE([HAVE_VA_WAYLAND], [1], [Defined to 1 if VA/Wayland API is enabled])],
+        [USE_WAYLAND="no"])
+fi
+AM_CONDITIONAL(USE_WAYLAND, test "$USE_WAYLAND" = "yes")
+
+m4_ifdef([WAYLAND_SCANNER_RULES],
+    [WAYLAND_SCANNER_RULES(['$(top_srcdir)/src/wayland'])],
+    [wayland_scanner_rules=""; AC_SUBST(wayland_scanner_rules)])
+
 dnl Check for JPEG decoding API
-AC_CACHE_CHECK([for JPEG decoding API], ac_cv_have_jpeg_decoding_api, [
-    saved_CFLAGS="$CFLAGS"
-    CFLAGS="$CFLAGS $LIBVA_DEPS_CFLAGS"
+AC_CACHE_CHECK([for JPEG decoding API], ac_cv_have_va_jpeg_decode, [
+    saved_CPPFLAGS="$CPPFLAGS"
+    CPPFLAGS="$CPPFLAGS $LIBVA_DEPS_CFLAGS"
     saved_LIBS="$LIBS"
-    LIBS="$CFLAGS $LIBVA_DEPS_LIBS"
-    AC_TRY_COMPILE(
-        [#include <va/va.h>],
-        [VAPictureParameterBufferJPEG pic_param;
-         VASliceParameterBufferJPEG slice_param;
-         VAIQMatrixBufferJPEG iq_matrix;],
-        [ac_cv_have_jpeg_decoding_api="yes"],
-        [ac_cv_have_jpeg_decoding_api="no"]
+    LIBS="$LIBS $LIBVA_DEPS_LIBS"
+    AC_COMPILE_IFELSE(
+        [AC_LANG_PROGRAM(
+            [[#include <va/va.h>]],
+            [[VAPictureParameterBufferJPEGBaseline pic_param;
+              VASliceParameterBufferJPEGBaseline slice_param;
+              VAHuffmanTableBufferJPEGBaseline huffman_table;
+              VAIQMatrixBufferJPEGBaseline iq_matrix;]])],
+            [ac_cv_have_va_jpeg_decode="yes"],
+            [ac_cv_have_va_jpeg_decode="no"]
     )
-    CFLAGS="$saved_CFLAGS"
+    CPPFLAGS="$saved_CPPFLAGS"
     LIBS="$saved_LIBS"
 ])
-if test "$ac_cv_have_jpeg_decoding_api" = "yes"; then
-    AC_DEFINE(HAVE_JPEG_DECODING, 1,
+if test "$ac_cv_have_va_jpeg_decode" = "yes"; then
+    AC_DEFINE(HAVE_VA_JPEG_DECODE, 1,
         [Defined to 1 if VA-API exposes JPEG decoding])
 fi
 
@@ -122,14 +198,23 @@ AC_OUTPUT([
     src/shaders/mpeg2/Makefile
     src/shaders/mpeg2/vld/Makefile
     src/shaders/post_processing/Makefile
+    src/shaders/post_processing/gen5_6/Makefile
+    src/shaders/post_processing/gen7/Makefile
     src/shaders/render/Makefile
     src/shaders/vme/Makefile
+    src/wayland/Makefile
 ])
 
 dnl Print summary
+BACKENDS=""
+AS_IF([test "$USE_DRM" = "yes"], [BACKENDS="$BACKENDS drm"])
+AS_IF([test "$USE_X11" = "yes"], [BACKENDS="$BACKENDS x11"])
+AS_IF([test "$USE_WAYLAND" = "yes"], [BACKENDS="$BACKENDS wayland"])
+
 echo
 echo $PACKAGE configuration summary:
 echo
 echo VA-API version ................... : $VA_VERSION_STR
 echo VA-API drivers path .............. : $LIBVA_DRIVERS_PATH
+echo Windowing systems ................ : $BACKENDS
 echo
diff --git a/debian.upstream/Makefile.am b/debian.upstream/Makefile.am
index 679dab6..9ef70b6 100644
--- a/debian.upstream/Makefile.am
+++ b/debian.upstream/Makefile.am
@@ -3,7 +3,7 @@ DEBIANFILES = \
 	compat					\
 	control.in				\
 	copyright				\
-	libva-driver-intel.install		\
+	libva-intel-driver.install		\
 	rules					\
 	$(NULL)
 
diff --git a/debian.upstream/changelog.in b/debian.upstream/changelog.in
index 70310b5..dff4a96 100644
--- a/debian.upstream/changelog.in
+++ b/debian.upstream/changelog.in
@@ -1,4 +1,4 @@
-libva-driver-intel (@PACKAGE_VERSION at -1) unstable; urgency=low
+libva-intel-driver (@PACKAGE_VERSION at -1) unstable; urgency=low
 
   * Autogenerated package, see NEWS file for ChangeLog.
 
diff --git a/debian.upstream/control.in b/debian.upstream/control.in
index 88ee928..08b1029 100644
--- a/debian.upstream/control.in
+++ b/debian.upstream/control.in
@@ -1,4 +1,4 @@
-Source: libva-driver-intel
+Source: libva-intel-driver
 Section: libs
 Priority: optional
 Maintainer: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
@@ -8,7 +8,7 @@ Build-Depends: debhelper (>= 5),
 	       libva-dev (>= @LIBVA_PACKAGE_VERSION@)
 Standards-Version: 3.7.2
 
-Package: libva-driver-intel
+Package: libva-intel-driver
 Section: libs
 Architecture: any
 Depends: libva1 (>= @LIBVA_PACKAGE_VERSION@),
@@ -17,10 +17,10 @@ Description: VA driver for Intel G45 & HD Graphics family
  Video decode & encode driver for Intel G45 chipsets and Intel HD
  Graphics for Intel Core processor family.
 
-Package: libva-driver-intel-dbg
+Package: libva-intel-driver-dbg
 Section: libdevel
 Architecture: any
-Depends: libva-driver-intel (= ${Source-Version})
+Depends: libva-intel-driver (= ${Source-Version})
 Description: VA driver for Intel G45 & HD Graphics family (debug symbols)
  Video decode & encode driver for Intel G45 chipsets and Intel HD
  Graphics for Intel Core processor family.
diff --git a/debian.upstream/libva-driver-intel.install b/debian.upstream/libva-intel-driver.install
similarity index 100%
rename from debian.upstream/libva-driver-intel.install
rename to debian.upstream/libva-intel-driver.install
diff --git a/src/Makefile.am b/src/Makefile.am
index 1e4bbf2..cbe0795 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -40,17 +40,21 @@ driver_ldflags = \
 	$(NULL)
 
 driver_libs = \
-	-lpthread		\
+	-lpthread -ldl		\
 	$(DRM_LIBS) -ldrm_intel	\
 	$(LIBVA_DEPS_LIBS)	\
-	$(LIBVA_X11_DEPS_LIBS)	\
 	$(NULL)
 
 source_c = \
+	dso_utils.c		\
 	gen6_mfc.c		\
 	gen6_mfd.c		\
 	gen6_vme.c		\
 	gen7_mfd.c		\
+	gen75_mfd.c		\
+	gen75_vme.c		\
+	gen75_mfc.c		\
+	gen75_vpp_vebox.c	\
 	i965_avc_bsd.c		\
 	i965_avc_hw_scoreboard.c\
 	i965_avc_ildb.c		\
@@ -70,10 +74,12 @@ source_c = \
 	$(NULL)
 
 source_h = \
+	dso_utils.h		\
 	gen6_mfc.h		\
 	gen6_mfd.h		\
 	gen6_vme.h		\
 	gen7_mfd.h		\
+	gen75_vpp_vebox.h	\
 	i965_avc_bsd.h		\
 	i965_avc_hw_scoreboard.h\
 	i965_avc_ildb.h		\
@@ -95,6 +101,8 @@ source_h = \
 	intel_driver.h          \
 	intel_memman.h          \
 	object_heap.h           \
+	sysdeps.h		\
+	va_backend_compat.h	\
 	$(NULL)
 
 i965_drv_video_la_LTLIBRARIES	= i965_drv_video.la
@@ -105,5 +113,22 @@ i965_drv_video_la_LIBADD	= $(driver_libs)
 i965_drv_video_la_SOURCES	= $(source_c)
 noinst_HEADERS			= $(source_h)
 
+if USE_X11
+source_c			+= i965_output_dri.c
+source_h			+= i965_output_dri.h
+endif
+
+if USE_WAYLAND
+source_c			+= i965_output_wayland.c
+source_h			+= i965_output_wayland.h
+driver_cflags			+= $(WAYLAND_CFLAGS)
+endif
+
+# Wayland protocol
+i965_output_wayland.c: $(protocol_source_h)
+ at wayland_scanner_rules@
+
+DIST_SUBDIRS = $(SUBDIRS) wayland
+
 # Extra clean files so that maintainer-clean removes *everything*
 MAINTAINERCLEANFILES = Makefile.in config.h.in
diff --git a/src/dso_utils.c b/src/dso_utils.c
new file mode 100644
index 0000000..8fdea11
--- /dev/null
+++ b/src/dso_utils.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+#include "dso_utils.h"
+
+struct dso_handle {
+    void       *handle;
+};
+
+/* Opens the named shared library */
+struct dso_handle *
+dso_open(const char *path)
+{
+    struct dso_handle *h;
+
+    h = calloc(1, sizeof(*h));
+    if (!h)
+        return NULL;
+
+    if (path) {
+        h->handle = dlopen(path, RTLD_LAZY|RTLD_LOCAL);
+        if (!h->handle)
+            goto error;
+    }
+    else
+        h->handle = RTLD_DEFAULT;
+    return h;
+
+error:
+    dso_close(h);
+    return NULL;
+}
+
+/* Closes and disposed any allocated data */
+void
+dso_close(struct dso_handle *h)
+{
+    if (!h)
+        return;
+
+    if (h->handle) {
+        if (h->handle != RTLD_DEFAULT)
+            dlclose(h->handle);
+        h->handle = NULL;
+    }
+    free(h);
+}
+
+/* Load symbol into the supplied location */
+static bool
+get_symbol(struct dso_handle *h, void *func_vptr, const char *name)
+{
+    dso_generic_func func, * const func_ptr = func_vptr;
+    const char *error;
+
+    dlerror();
+    func = (dso_generic_func)dlsym(h->handle, name);
+    error = dlerror();
+    if (error) {
+        fprintf(stderr, "error: failed to resolve %s(): %s\n", name, error);
+        return false;
+    }
+    *func_ptr = func;
+    return true;
+}
+
+/* Loads symbols into the supplied vtable */
+bool
+dso_get_symbols(
+    struct dso_handle          *h,
+    void                       *vtable,
+    unsigned int                vtable_length,
+    const struct dso_symbol    *symbols
+)
+{
+    const struct dso_symbol *s;
+
+    for (s = symbols; s->name != NULL; s++) {
+        if (s->offset + sizeof(dso_generic_func) > vtable_length)
+            return false;
+        if (!get_symbol(h, ((char *)vtable) + s->offset, s->name))
+            return false;
+    }
+    return true;
+}
diff --git a/src/dso_utils.h b/src/dso_utils.h
new file mode 100644
index 0000000..9b8eba7
--- /dev/null
+++ b/src/dso_utils.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef DSO_UTILS_H
+#define DSO_UTILS_H
+
+#include <stdbool.h>
+
+/** Generic pointer to function. */
+typedef void (*dso_generic_func)(void);
+
+/** Library handle (opaque). */
+struct dso_handle;
+
+/** Symbol lookup table. */
+struct dso_symbol {
+    /** Symbol name */
+    const char  *name;
+    /** Offset into the supplied vtable where symbol is to be loaded. */
+    unsigned int offset;
+};
+
+/**
+ * Opens the named shared library.
+ *
+ * @param[in]  path  the library name, or NULL to lookup into loaded libraries
+ * @return the newly allocated library handle
+ */
+struct dso_handle *
+dso_open(const char *path);
+
+/** Closes and disposed any allocated data. */
+void
+dso_close(struct dso_handle *h);
+
+/**
+ * Loads symbols into the supplied vtable.
+ *
+ * @param[in]  handle           the DSO handle
+ * @param[in]  vtable           the function table to fill in
+ * @param[in]  vtable_length    the size (in bytes) of the function table
+ * @param[in]  symbols          the NULL terminated array of symbols to lookup
+ * @return true on success, false otherwise
+ **/
+bool
+dso_get_symbols(
+    struct dso_handle          *h,
+    void                       *vtable,
+    unsigned int                vtable_length,
+    const struct dso_symbol    *symbols
+);
+
+#endif /* DSO_UTILS_H */
diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 863d4ec..a479f80 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -39,9 +39,12 @@
 #include "i965_encoder.h"
 
 static void
-gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_pipe_mode_select(VADriverContextP ctx,
+                          struct gen6_encoder_context *gen6_encoder_context,
+                          struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
 
     BEGIN_BCS_BATCH(batch, 4);
 
@@ -73,9 +76,11 @@ gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen
 static void
 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
                           int standard_select,
-                          struct gen6_encoder_context *gen6_encoder_context)
+                          struct gen6_encoder_context *gen6_encoder_context,
+                          struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
 
     assert(standard_select == MFX_FORMAT_MPEG2 ||
            standard_select == MFX_FORMAT_AVC);
@@ -107,11 +112,15 @@ gen7_mfc_pipe_mode_select(VADriverContextP ctx,
 }
 
 static void
-gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_surface_state(VADriverContextP ctx,
+                       struct gen6_encoder_context *gen6_encoder_context,
+                       struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 6);
 
     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
@@ -135,11 +144,15 @@ gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e
 }
 
 static void
-gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen7_mfc_surface_state(VADriverContextP ctx,
+                       struct gen6_encoder_context *gen6_encoder_context,
+                       struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 6);
 
     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
@@ -163,12 +176,16 @@ gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e
 }
 
 static void
-gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx,
+                             struct gen6_encoder_context *gen6_encoder_context,
+                             struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
     int i;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 24);
 
     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
@@ -206,11 +223,15 @@ gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *
 }
 
 static void
-gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
+                                 struct gen6_encoder_context *gen6_encoder_context,
+                                 struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 11);
 
     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
@@ -231,11 +252,15 @@ gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_conte
 }
 
 static void
-gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
+                                 struct gen6_encoder_context *gen6_encoder_context,
+                                 struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 11);
 
     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
@@ -256,11 +281,15 @@ gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_conte
 }
 
 static void
-gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
+                                 struct gen6_encoder_context *gen6_encoder_context,
+                                 struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 4);
 
     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
@@ -274,14 +303,17 @@ gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_conte
 }
 
 static void
-gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_avc_img_state(VADriverContextP ctx,
+                       struct gen6_encoder_context *gen6_encoder_context,
+                       struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
-
     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 13);
     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
     OUT_BCS_BATCH(batch, 
@@ -328,14 +360,17 @@ gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e
 }
 
 static void
-gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen7_mfc_avc_img_state(VADriverContextP ctx,
+                       struct gen6_encoder_context *gen6_encoder_context,
+                       struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
-
     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 16);
     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
     OUT_BCS_BATCH(batch,
@@ -385,49 +420,16 @@ gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e
     ADVANCE_BCS_BATCH(batch);
 }
 
-static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
-{
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
-    int i;
-
-    BEGIN_BCS_BATCH(batch, 69);
-
-    OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
-    //TODO: reference DMV
-    for(i = 0; i < 16; i++){
-        OUT_BCS_BATCH(batch, 0);
-        OUT_BCS_BATCH(batch, 0);
-    }
-
-    //TODO: current DMV just for test
-#if 0
-    OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
-                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-                  0);
-#else
-    //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
-    //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
-    OUT_BCS_BATCH(batch, 0);
-#endif
-
-
-    OUT_BCS_BATCH(batch, 0);
-
-    //TODO: POL list
-    for(i = 0; i < 34; i++) {
-        OUT_BCS_BATCH(batch, 0);
-    }
-
-    ADVANCE_BCS_BATCH(batch);
-}
-
 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
                                      int intra_slice,
-                                     struct gen6_encoder_context *gen6_encoder_context)
+                                     struct gen6_encoder_context *gen6_encoder_context,
+                                     struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 11);;
 
     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
@@ -470,11 +472,15 @@ static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
 
     ADVANCE_BCS_BATCH(batch);
 }
-static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_mfc_avc_qm_state(VADriverContextP ctx,
+                                  struct gen6_encoder_context *gen6_encoder_context,
+                                  struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     int i;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 58);
 
     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
@@ -486,11 +492,15 @@ static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_cont
     ADVANCE_BCS_BATCH(batch);
 }
 
-static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_mfc_avc_fqm_state(VADriverContextP ctx,
+                                   struct gen6_encoder_context *gen6_encoder_context,
+                                   struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     int i;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 113);
     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
 
@@ -506,11 +516,14 @@ gen7_mfc_qm_state(VADriverContextP ctx,
                   int qm_type,
                   unsigned int *qm,
                   int qm_length,
-                  struct gen6_encoder_context *gen6_encoder_context)
+                  struct gen6_encoder_context *gen6_encoder_context,
+                  struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     unsigned int qm_buffer[16];
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     assert(qm_length <= 16);
     assert(sizeof(*qm) == 4);
     memcpy(qm_buffer, qm, qm_length * 4);
@@ -522,7 +535,9 @@ gen7_mfc_qm_state(VADriverContextP ctx,
     ADVANCE_BCS_BATCH(batch);
 }
 
-static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen7_mfc_avc_qm_state(VADriverContextP ctx,
+                                  struct gen6_encoder_context *gen6_encoder_context,
+                                  struct intel_batchbuffer *batch)
 {
     unsigned int qm[16] = {
         0x10101010, 0x10101010, 0x10101010, 0x10101010,
@@ -531,10 +546,10 @@ static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_cont
         0x10101010, 0x10101010, 0x10101010, 0x10101010
     };
 
-    gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
-    gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
-    gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
-    gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
+    gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context, batch);
+    gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context, batch);
+    gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context, batch);
+    gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context, batch);
 }
 
 static void
@@ -542,11 +557,14 @@ gen7_mfc_fqm_state(VADriverContextP ctx,
                    int fqm_type,
                    unsigned int *fqm,
                    int fqm_length,
-                   struct gen6_encoder_context *gen6_encoder_context)
+                   struct gen6_encoder_context *gen6_encoder_context,
+                   struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     unsigned int fqm_buffer[32];
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     assert(fqm_length <= 32);
     assert(sizeof(*fqm) == 4);
     memcpy(fqm_buffer, fqm, fqm_length * 4);
@@ -558,7 +576,9 @@ gen7_mfc_fqm_state(VADriverContextP ctx,
     ADVANCE_BCS_BATCH(batch);
 }
 
-static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen7_mfc_avc_fqm_state(VADriverContextP ctx,
+                                   struct gen6_encoder_context *gen6_encoder_context,
+                                   struct intel_batchbuffer *batch)
 {
     unsigned int qm[32] = {
         0x10001000, 0x10001000, 0x10001000, 0x10001000,
@@ -571,17 +591,21 @@ static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_con
         0x10001000, 0x10001000, 0x10001000, 0x10001000
     };
 
-    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
-    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
-    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
-    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
+    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context, batch);
+    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context, batch);
+    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context, batch);
+    gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context, batch);
 }
 
-static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx,
+                                       struct gen6_encoder_context *gen6_encoder_context,
+                                       struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     int i;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, 10);
 
     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
@@ -595,32 +619,16 @@ static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder
     ADVANCE_BCS_BATCH(batch);
 }
 	
-static void
-gen6_mfc_avc_insert_object(VADriverContextP ctx, int flush_data, struct gen6_encoder_context *gen6_encoder_context)
-{
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
-
-    BEGIN_BCS_BATCH(batch, 4);
-
-    OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (4 -2 ) );
-    OUT_BCS_BATCH(batch, (32<<8) | 
-                  (1 << 3) |
-                  (1 << 2) |
-                  (flush_data << 1) |
-                  (1<<0) );
-    OUT_BCS_BATCH(batch, 0x00000003);
-    OUT_BCS_BATCH(batch, 0xABCD1234);
-
-    ADVANCE_BCS_BATCH(batch);
-}
-
 static int
 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
-                              struct gen6_encoder_context *gen6_encoder_context)
+                              struct gen6_encoder_context *gen6_encoder_context,
+                              struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     int len_in_dwords = 11;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, len_in_dwords);
 
     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
@@ -651,11 +659,13 @@ gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, in
 }
 
 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
-                                         struct gen6_encoder_context *gen6_encoder_context)
+                                         struct gen6_encoder_context *gen6_encoder_context, struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     int len_in_dwords = 11;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BCS_BATCH(batch, len_in_dwords);
 
     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
@@ -696,12 +706,16 @@ static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int
     return len_in_dwords;
 }
 
-static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_mfc_init(VADriverContextP ctx,
+                          struct encode_state *encode_state,
+                          struct gen6_encoder_context *gen6_encoder_context)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
     dri_bo *bo;
     int i;
+    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+    int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
 
     /*Encode common setup for MFC*/
     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
@@ -730,7 +744,7 @@ static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen
     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
     bo = dri_bo_alloc(i965->intel.bufmgr,
                       "Buffer",
-                      128 * 64,
+                      width_in_mbs * 64,
                       64);
     assert(bo);
     mfc_context->intra_row_store_scratch_buffer.bo = bo;
@@ -738,7 +752,7 @@ static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen
     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
     bo = dri_bo_alloc(i965->intel.bufmgr,
                       "Buffer",
-                      49152,  /* 6 * 128 * 64 */
+                      4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
                       64);
     assert(bo);
     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
@@ -746,7 +760,7 @@ static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen
     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
     bo = dri_bo_alloc(i965->intel.bufmgr,
                       "Buffer",
-                      12288, /* 1.5 * 128 * 64 */
+                      128 * width_in_mbs, /* 2 * widht_in_mbs * 64 */
                       0x1000);
     assert(bo);
     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
@@ -757,7 +771,7 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
                                       struct gen6_encoder_context *gen6_encoder_context)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+    struct intel_batchbuffer *main_batch = gen6_encoder_context->base.batch;
     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
@@ -768,8 +782,9 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
     int x,y;
+    struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, width_in_mbs * height_in_mbs * 12 * 4 + 0x800);
 
-    intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
+    intel_batchbuffer_start_atomic_bcs(batch, width_in_mbs * height_in_mbs * 12 * 4 + 0x700);
 
     if (is_intra) {
         dri_bo_map(vme_context->vme_output.bo , 1);
@@ -785,39 +800,39 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
                 intel_batchbuffer_emit_mi_flush(batch);
                 
                 if (IS_GEN7(i965->intel.device_id)) {
-                    gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
-                    gen7_mfc_surface_state(ctx, gen6_encoder_context);
-                    gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
+                    gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context, batch);
+                    gen7_mfc_surface_state(ctx, gen6_encoder_context, batch);
+                    gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context, batch);
                 } else {
-                    gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
-                    gen6_mfc_surface_state(ctx, gen6_encoder_context);
-                    gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
+                    gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context, batch);
+                    gen6_mfc_surface_state(ctx, gen6_encoder_context, batch);
+                    gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context, batch);
                 }
 
-                gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
-                gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
+                gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context, batch);
+                gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context, batch);
 
                 if (IS_GEN7(i965->intel.device_id)) {
-                    gen7_mfc_avc_img_state(ctx, gen6_encoder_context);
-                    gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
-                    gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
+                    gen7_mfc_avc_img_state(ctx, gen6_encoder_context, batch);
+                    gen7_mfc_avc_qm_state(ctx, gen6_encoder_context, batch);
+                    gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context, batch);
                 } else {
-                    gen6_mfc_avc_img_state(ctx, gen6_encoder_context);
-                    gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
-                    gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
+                    gen6_mfc_avc_img_state(ctx, gen6_encoder_context, batch);
+                    gen6_mfc_avc_qm_state(ctx, gen6_encoder_context, batch);
+                    gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context, batch);
                 }
 
-                gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
-                gen6_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context);
+                gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context, batch);
+                gen6_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context, batch);
                 emit_new_state = 0;
             }
 
             if (is_intra) {
                 assert(msg);
-                object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context);
+                object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, batch);
                 msg += 4;
             } else {
-                object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context);
+                object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, batch);
                 offset += 64;
             }
 
@@ -832,8 +847,30 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
 
     if (is_intra)
         dri_bo_unmap(vme_context->vme_output.bo);
-	
+
+    intel_batchbuffer_align(batch, 8);
+
+    BEGIN_BCS_BATCH(batch, 2);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
+    ADVANCE_BCS_BATCH(batch);
+
     intel_batchbuffer_end_atomic(batch);
+
+    /* chain to the main batch buffer */
+    intel_batchbuffer_start_atomic_bcs(main_batch, 0x100);
+    intel_batchbuffer_emit_mi_flush(main_batch);
+    BEGIN_BCS_BATCH(main_batch, 2);
+    OUT_BCS_BATCH(main_batch, MI_BATCH_BUFFER_START | (1 << 8));
+    OUT_BCS_RELOC(main_batch,
+                  batch->buffer,
+                  I915_GEM_DOMAIN_COMMAND, 0,
+                  0);
+    ADVANCE_BCS_BATCH(main_batch);
+    intel_batchbuffer_end_atomic(main_batch);
+
+    // end programing             
+    intel_batchbuffer_free(batch);	
 }
 
 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, 
@@ -921,7 +958,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
                             struct encode_state *encode_state,
                             struct gen6_encoder_context *gen6_encoder_context)
 {
-    gen6_mfc_init(ctx, gen6_encoder_context);
+    gen6_mfc_init(ctx, encode_state, gen6_encoder_context);
     gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
     gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
     gen6_mfc_stop(ctx, encode_state, gen6_encoder_context);
diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h
index 75bcf63..22cd62b 100644
--- a/src/gen6_mfc.h
+++ b/src/gen6_mfc.h
@@ -103,4 +103,12 @@ gen6_mfc_pipeline(VADriverContextP ctx,
 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context);
 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context);
 
+VAStatus 
+gen75_mfc_pipeline(VADriverContextP ctx,
+                  VAProfile profile,
+                  struct encode_state *encode_state,
+                  struct gen6_encoder_context *gen6_encoder_context);
+Bool gen75_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context);
+Bool gen75_mfc_context_destroy(struct gen6_mfc_context *mfc_context);
+
 #endif	/* _GEN6_MFC_BCS_H_ */
diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
old mode 100644
new mode 100755
index c4bec7b..9c110c6
--- a/src/gen6_mfd.c
+++ b/src/gen6_mfd.c
@@ -26,14 +26,13 @@
  *
  */
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
+#ifndef HAVE_GEN_AVC_SURFACE
+#define HAVE_GEN_AVC_SURFACE 1
+#endif
 
+#include "sysdeps.h"
 #include "intel_batchbuffer.h"
 #include "intel_driver.h"
-
 #include "i965_defines.h"
 #include "i965_drv_video.h"
 #include "i965_decoder_utils.h"
@@ -167,37 +166,20 @@ gen6_mfd_avc_frame_store_index(VADriverContextP ctx,
     }
 }
 
-static void 
-gen6_mfd_free_avc_surface(void **data)
-{
-    struct gen6_avc_surface *gen6_avc_surface = *data;
-
-    if (!gen6_avc_surface)
-        return;
-
-    dri_bo_unreference(gen6_avc_surface->dmv_top);
-    gen6_avc_surface->dmv_top = NULL;
-    dri_bo_unreference(gen6_avc_surface->dmv_bottom);
-    gen6_avc_surface->dmv_bottom = NULL;
-
-    free(gen6_avc_surface);
-    *data = NULL;
-}
-
 static void
 gen6_mfd_init_avc_surface(VADriverContextP ctx, 
                           VAPictureParameterBufferH264 *pic_param,
                           struct object_surface *obj_surface)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct gen6_avc_surface *gen6_avc_surface = obj_surface->private_data;
+    GenAvcSurface *gen6_avc_surface = obj_surface->private_data;
     int height_in_mbs;
 
-    obj_surface->free_private_data = gen6_mfd_free_avc_surface;
+    obj_surface->free_private_data = gen_free_avc_surface;
     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
 
     if (!gen6_avc_surface) {
-        gen6_avc_surface = calloc(sizeof(struct gen6_avc_surface), 1);
+        gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
         assert((obj_surface->size & 0x3f) == 0);
         obj_surface->private_data = gen6_avc_surface;
     }
@@ -413,27 +395,6 @@ gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
 }
 
 static void
-gen6_mfd_aes_state(VADriverContextP ctx,
-                   struct decode_state *decode_state,
-                   int standard_select)
-{
-    /* FIXME */
-}
-
-static void
-gen6_mfd_wait(VADriverContextP ctx,
-              struct decode_state *decode_state,
-              int standard_select,
-              struct gen6_mfd_context *gen6_mfd_context)
-{
-    struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
-
-    BEGIN_BCS_BATCH(batch, 1);
-    OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
-    ADVANCE_BCS_BATCH(batch);
-}
-
-static void
 gen6_mfd_avc_img_state(VADriverContextP ctx,
                        struct decode_state *decode_state,
                        struct gen6_mfd_context *gen6_mfd_context)
@@ -575,7 +536,7 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx,
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
     struct object_surface *obj_surface;
-    struct gen6_avc_surface *gen6_avc_surface;
+    GenAvcSurface *gen6_avc_surface;
     VAPictureH264 *va_pic;
     int i, j;
 
@@ -1641,9 +1602,19 @@ gen6_mfd_vc1_pic_state(VADriverContextP ctx,
 
     if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
-    else
+    else {
         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
-
+        /*
+         * 8.3.6.2.1 Transform Type Selection
+         * If variable-sized transform coding is not enabled,
+         * then the 8x8 transform shall be used for all blocks.
+         * it is also MFX_VC1_PIC_STATE requirement.
+         */
+        if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
+            pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
+            pic_param->transform_fields.bits.frame_level_transform_type     = 0;
+        }
+    }
 
     if (picture_type == GEN6_VC1_B_PICTURE) {
         struct gen6_vc1_surface *gen6_vc1_surface = NULL;
@@ -2031,7 +2002,7 @@ gen6_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
 
     gen6_mfd_context->base.destroy = gen6_mfd_context_destroy;
     gen6_mfd_context->base.run = gen6_mfd_decode_picture;
-    gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
+    gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
 
     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
         gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
diff --git a/src/gen6_mfd.h b/src/gen6_mfd.h
index 6e20364..de131d6 100644
--- a/src/gen6_mfd.h
+++ b/src/gen6_mfd.h
@@ -35,13 +35,6 @@
 #include <intel_bufmgr.h>
 #include "i965_decoder.h"
 
-struct gen6_avc_surface
-{
-    dri_bo *dmv_top;
-    dri_bo *dmv_bottom;
-    int dmv_bottom_flag;
-};
-
 #define GEN6_VC1_I_PICTURE              0
 #define GEN6_VC1_P_PICTURE              1
 #define GEN6_VC1_B_PICTURE              2
diff --git a/src/gen6_vme.c b/src/gen6_vme.c
index 2ffbd43..9fe8cd9 100644
--- a/src/gen6_vme.c
+++ b/src/gen6_vme.c
@@ -627,11 +627,9 @@ static VAStatus gen6_vme_constant_setup(VADriverContextP ctx,
                                         struct gen6_encoder_context *gen6_encoder_context)
 {
     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
-    unsigned char *constant_buffer;
 
     dri_bo_map(vme_context->curbe.bo, 1);
     assert(vme_context->curbe.bo->virtual);
-    constant_buffer = vme_context->curbe.bo->virtual;
 	
     /*TODO copy buffer into CURB*/
 
@@ -672,19 +670,26 @@ static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx,
     return VA_STATUS_SUCCESS;
 }
 
-static void gen6_vme_pipeline_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_vme_pipeline_select(VADriverContextP ctx,
+                                     struct gen6_encoder_context *gen6_encoder_context,
+                                     struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
 
     BEGIN_BATCH(batch, 1);
     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
     ADVANCE_BATCH(batch);
 }
 
-static void gen6_vme_state_base_address(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_vme_state_base_address(VADriverContextP ctx,
+                                        struct gen6_encoder_context *gen6_encoder_context,
+                                        struct intel_batchbuffer *batch)
 {
     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
 
     BEGIN_BATCH(batch, 10);
 
@@ -709,11 +714,15 @@ static void gen6_vme_state_base_address(VADriverContextP ctx, struct gen6_encode
     ADVANCE_BATCH(batch);
 }
 
-static void gen6_vme_vfe_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_vme_vfe_state(VADriverContextP ctx,
+                               struct gen6_encoder_context *gen6_encoder_context,
+                               struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BATCH(batch, 8);
 
     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | 6);					/*Gen6 CMD_MEDIA_STATE_POINTERS = CMD_MEDIA_STATE */
@@ -732,11 +741,15 @@ static void gen6_vme_vfe_state(VADriverContextP ctx, struct gen6_encoder_context
 
 }
 
-static void gen6_vme_curbe_load(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_vme_curbe_load(VADriverContextP ctx,
+                                struct gen6_encoder_context *gen6_encoder_context,
+                                struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BATCH(batch, 4);
 
     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | 2);
@@ -748,11 +761,15 @@ static void gen6_vme_curbe_load(VADriverContextP ctx, struct gen6_encoder_contex
     ADVANCE_BATCH(batch);
 }
 
-static void gen6_vme_idrt(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_vme_idrt(VADriverContextP ctx,
+                          struct gen6_encoder_context *gen6_encoder_context,
+                          struct intel_batchbuffer *batch)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BATCH(batch, 4);
 
     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | 2);	
@@ -767,14 +784,17 @@ static int gen6_vme_media_object(VADriverContextP ctx,
                                  struct encode_state *encode_state,
                                  int mb_x, int mb_y,
                                  int kernel,
-                                 struct gen6_encoder_context *gen6_encoder_context)
+                                 struct gen6_encoder_context *gen6_encoder_context,
+                                 struct intel_batchbuffer *batch)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
     struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
     int mb_width = ALIGN(obj_surface->orig_width, 16) / 16;
     int len_in_dowrds = 6 + 1;
 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
     BEGIN_BATCH(batch, len_in_dowrds);
     
     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (len_in_dowrds - 2));
@@ -844,7 +864,8 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
                                          struct encode_state *encode_state,
                                          struct gen6_encoder_context *gen6_encoder_context)
 {
-    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *main_batch = gen6_encoder_context->base.batch;
     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
@@ -852,8 +873,9 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
     int emit_new_state = 1, object_len_in_bytes;
     int x, y;
+    struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, width_in_mbs * height_in_mbs * 8 * 4 + 0x200);
 
-    intel_batchbuffer_start_atomic(batch, 0x1000);
+    intel_batchbuffer_start_atomic(batch, width_in_mbs * height_in_mbs * 8 * 4 + 0x100);
 
     for(y = 0; y < height_in_mbs; y++){
         for(x = 0; x < width_in_mbs; x++){	
@@ -863,19 +885,19 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
                 intel_batchbuffer_emit_mi_flush(batch);
 
                 /*Step2: State command PIPELINE_SELECT*/
-                gen6_vme_pipeline_select(ctx, gen6_encoder_context);
+                gen6_vme_pipeline_select(ctx, gen6_encoder_context, batch);
 
                 /*Step3: State commands configuring pipeline states*/
-                gen6_vme_state_base_address(ctx, gen6_encoder_context);
-                gen6_vme_vfe_state(ctx, gen6_encoder_context);
-                gen6_vme_curbe_load(ctx, gen6_encoder_context);
-                gen6_vme_idrt(ctx, gen6_encoder_context);
+                gen6_vme_state_base_address(ctx, gen6_encoder_context, batch);
+                gen6_vme_vfe_state(ctx, gen6_encoder_context, batch);
+                gen6_vme_curbe_load(ctx, gen6_encoder_context, batch);
+                gen6_vme_idrt(ctx, gen6_encoder_context, batch);
 
                 emit_new_state = 0;
             }
 
             /*Step4: Primitive commands*/
-            object_len_in_bytes = gen6_vme_media_object(ctx, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, gen6_encoder_context);
+            object_len_in_bytes = gen6_vme_media_object(ctx, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, gen6_encoder_context, batch);
 
             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
                 assert(0);
@@ -887,7 +909,29 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
         }
     }
 
-    intel_batchbuffer_end_atomic(batch);	
+    intel_batchbuffer_align(batch, 8);
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, MI_BATCH_BUFFER_END);
+    ADVANCE_BATCH(batch);
+
+    intel_batchbuffer_end_atomic(batch);
+
+    /* chain to the main batch buffer */
+    intel_batchbuffer_start_atomic(main_batch, 0x100);
+    intel_batchbuffer_emit_mi_flush(main_batch);
+    BEGIN_BATCH(main_batch, 2);
+    OUT_BATCH(main_batch, MI_BATCH_BUFFER_START | (2 << 6));
+    OUT_RELOC(main_batch,
+              batch->buffer,
+              I915_GEM_DOMAIN_COMMAND, 0,
+              0);
+    ADVANCE_BATCH(main_batch);
+    intel_batchbuffer_end_atomic(main_batch);
+
+    // end programing             
+    intel_batchbuffer_free(batch);
 }
 
 static VAStatus gen6_vme_prepare(VADriverContextP ctx, 
diff --git a/src/gen6_vme.h b/src/gen6_vme.h
index 800898c..4d540c9 100644
--- a/src/gen6_vme.h
+++ b/src/gen6_vme.h
@@ -35,6 +35,7 @@
 #include <intel_bufmgr.h>
 
 
+#define INTRA_VME_OUTPUT_IN_BYTES       16      /* in bytes */
 #define MAX_INTERFACE_DESC_GEN6      32
 #define MAX_MEDIA_SURFACES_GEN6      34
 
@@ -77,6 +78,7 @@ struct gen6_vme_context
     } vme_output;
 
     struct i965_kernel vme_kernels[GEN6_VME_KERNEL_NUMBER];
+    void *vme_state_message;
 };
 
 VAStatus gen6_vme_pipeline(VADriverContextP ctx,
@@ -86,4 +88,11 @@ VAStatus gen6_vme_pipeline(VADriverContextP ctx,
 Bool gen6_vme_context_init(VADriverContextP ctx, struct gen6_vme_context *vme_context);
 Bool gen6_vme_context_destroy(struct gen6_vme_context *vme_context);
 
+VAStatus gen75_vme_pipeline(VADriverContextP ctx,
+                           VAProfile profile,
+                           struct encode_state *encode_state,
+                           struct gen6_encoder_context *gen6_encoder_context);
+
+Bool gen75_vme_context_init(VADriverContextP ctx, struct gen6_vme_context *vme_context);
+Bool gen75_vme_context_destroy(struct gen6_vme_context *vme_context);
 #endif /* _GEN6_VME_H_ */
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
new file mode 100644
index 0000000..0f2c62e
--- /dev/null
+++ b/src/gen75_mfc.c
@@ -0,0 +1,1183 @@
+/*
+ * Copyright © 2010-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Zhao Yakui <yakui.zhao at intel.com>
+ *    Xiang Haihao <haihao.xiang at intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "assert.h"
+#include "intel_batchbuffer.h"
+#include "i965_defines.h"
+#include "i965_structs.h"
+#include "i965_drv_video.h"
+#include "i965_encoder.h"
+
+#define B0_STEP_REV		2
+#define IS_STEPPING_BPLUS(i965)	((i965->intel.revision) >= B0_STEP_REV)
+
+static void
+gen75_mfc_pipe_mode_select(VADriverContextP ctx,
+                           int standard_select,
+                           struct gen6_encoder_context *gen6_encoder_context,
+                           struct intel_batchbuffer *batch)
+{
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    assert(standard_select == MFX_FORMAT_MPEG2 ||
+           standard_select == MFX_FORMAT_AVC);
+
+    BEGIN_BCS_BATCH(batch, 5);
+    OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
+    OUT_BCS_BATCH(batch,
+                  (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
+                  (MFD_MODE_VLD << 15) | /* VLD mode */
+                  (0 << 10) | /* disable Stream-Out */
+                  (1 << 9)  | /* Post Deblocking Output */
+                  (0 << 8)  | /* Pre Deblocking Output */
+                  (0 << 5)  | /* not in stitch mode */
+                  (1 << 4)  | /* encoding mode */
+                  (standard_select << 0));  /* standard select: avc or mpeg2 */
+    OUT_BCS_BATCH(batch,
+                  (0 << 7)  | /* expand NOA bus flag */
+                  (0 << 6)  | /* disable slice-level clock gating */
+                  (0 << 5)  | /* disable clock gating for NOA */
+                  (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
+                  (0 << 3)  | /* terminate if AVC mbdata error occurs */
+                  (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
+                  (0 << 1)  |
+                  (0 << 0));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+#define		INTER_MODE_MASK		0x03
+#define		INTER_8X8		0x03
+#define		SUBMB_SHAPE_MASK	0x00FF00
+
+#define		INTER_MV8		(4 << 20)
+#define		INTER_MV32		(6 << 20)
+
+
+static void
+gen75_mfc_surface_state(VADriverContextP ctx,
+                        struct gen6_encoder_context *gen6_encoder_context,
+                        struct intel_batchbuffer *batch)
+{
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 6);
+
+    OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch,
+                  ((mfc_context->surface_state.height - 1) << 18) |
+                  ((mfc_context->surface_state.width - 1) << 4));
+    OUT_BCS_BATCH(batch,
+                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+                  (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
+                  (0 << 22) | /* surface object control state, FIXME??? */
+                  ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
+                  (0 << 2)  | /* must be 0 for interleave U/V */
+                  (1 << 1)  | /* must be tiled */
+                  (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
+    OUT_BCS_BATCH(batch,
+                  (0 << 16) | 								/* must be 0 for interleave U/V */
+                  (mfc_context->surface_state.h_pitch)); 		/* y offset for U(cb) */
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
+                                    struct gen6_encoder_context *gen6_encoder_context,
+                                    struct intel_batchbuffer *batch)
+{
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+    int i;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 61);
+
+    OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
+
+    /* the DW1-3 is for pre_deblocking */
+        OUT_BCS_BATCH(batch, 0);											/* pre output addr   */
+
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+     /* the DW4-6 is for the post_deblocking */
+
+    if (mfc_context->post_deblocking_output.bo)
+        OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);											/* post output addr  */	
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+
+     /* the DW7-9 is for the uncompressed_picture */
+    OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0); /* uncompressed data */
+
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+
+     /* the DW10-12 is for the mb status */
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+
+     /* the DW13-15 is for the intra_row_store_scratch */
+    OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);	
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+
+     /* the DW16-18 is for the deblocking filter */
+    OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+
+    /* the DW 19-50 is for Reference pictures*/
+    for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
+        if ( mfc_context->reference_surfaces[i].bo != NULL) {
+            OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
+                          I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                          0);			
+        } else {
+            OUT_BCS_BATCH(batch, 0);
+        }
+	OUT_BCS_BATCH(batch, 0);
+    }
+        OUT_BCS_BATCH(batch, 0);
+
+	/* The DW 52-54 is for the MB status buffer */
+        OUT_BCS_BATCH(batch, 0);
+	
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+
+	/* the DW 55-57 is the ILDB buffer */
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+
+	/* the DW 58-60 is the second ILDB buffer */
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx,
+                              struct gen6_encoder_context *gen6_encoder_context,
+                              struct intel_batchbuffer *batch)
+{
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+    int i;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+ 
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_mfc_pipe_buf_addr_state_bplus(ctx, gen6_encoder_context, batch);
+	return;
+    }
+
+    BEGIN_BCS_BATCH(batch, 25);
+
+    OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
+
+    OUT_BCS_BATCH(batch, 0);											/* pre output addr   */
+
+    OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);											/* post output addr  */	
+
+    OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);											/* uncompressed data */
+
+    OUT_BCS_BATCH(batch, 0);											/* StreamOut data*/
+    OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);	
+    OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+    /* 7..22 Reference pictures*/
+    for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
+        if ( mfc_context->reference_surfaces[i].bo != NULL) {
+            OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
+                          I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                          0);			
+        } else {
+            OUT_BCS_BATCH(batch, 0);
+        }
+    }
+    OUT_BCS_BATCH(batch, 0);   											/* no block status  */
+
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
+                                        struct gen6_encoder_context *gen6_encoder_context,
+                                        struct intel_batchbuffer *batch)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 26);
+
+    OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
+	/* the DW1-3 is for the MFX indirect bistream offset */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+	/* the DW4-5 is the MFX upper bound */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    /* the DW6-10 is for MFX Indirect MV Object Base Address */
+    OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+    OUT_BCS_BATCH(batch, 0);
+
+     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */	
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/	
+    OUT_BCS_RELOC(batch,
+                  mfc_context->mfc_indirect_pak_bse_object.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+	
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0x00000000);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
+                                  struct gen6_encoder_context *gen6_encoder_context,
+                                  struct intel_batchbuffer *batch)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_mfc_ind_obj_base_addr_state_bplus(ctx, gen6_encoder_context, batch);
+	return;
+    }
+
+    BEGIN_BCS_BATCH(batch, 11);
+
+    OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    /* MFX Indirect MV Object Base Address */
+    OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+    OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    /*MFC Indirect PAK-BSE Object Base Address for Encoder*/	
+    OUT_BCS_RELOC(batch,
+                  mfc_context->mfc_indirect_pak_bse_object.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+    OUT_BCS_BATCH(batch, 0x00000000); /* must set, up to 2G */
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
+                                        struct gen6_encoder_context *gen6_encoder_context,
+                                        struct intel_batchbuffer *batch)
+{
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 10);
+
+    OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
+    OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+	
+	/* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+	/* the DW7-9 is for Bitplane Read Buffer Base Address */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
+                                  struct gen6_encoder_context *gen6_encoder_context,
+                                  struct intel_batchbuffer *batch)
+{
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, gen6_encoder_context, batch);
+	return;
+    }
+ 
+
+    BEGIN_BCS_BATCH(batch, 4);
+
+    OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
+    OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_avc_img_state(VADriverContextP ctx,
+                        struct gen6_encoder_context *gen6_encoder_context,
+                        struct intel_batchbuffer *batch)
+{
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+    int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+    int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 16);
+    OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
+    OUT_BCS_BATCH(batch,
+                  ((width_in_mbs * height_in_mbs) & 0xFFFF));
+    OUT_BCS_BATCH(batch, 
+                  ((height_in_mbs - 1) << 16) | 
+                  ((width_in_mbs - 1) << 0));
+    OUT_BCS_BATCH(batch, 
+                  (0 << 24) |	/* Second Chroma QP Offset */
+                  (0 << 16) |	/* Chroma QP Offset */
+                  (0 << 14) |   /* Max-bit conformance Intra flag */
+                  (0 << 13) |   /* Max Macroblock size conformance Inter flag */
+                  (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
+                  (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
+                  (0 << 8)  |   /* FIXME: Image Structure */
+                  (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
+    OUT_BCS_BATCH(batch,
+                  (0 << 16) |   /* Mininum Frame size */
+                  (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
+                  (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
+                  (0 << 13) |   /* CABAC 0 word insertion test enable */
+                  (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
+                  (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
+                  (0 << 8)  |   /* FIXME: MbMvFormatFlag */
+                  (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
+                  (0 << 6)  |   /* Only valid for VLD decoding mode */
+                  (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
+                  (0 << 4)  |   /* Direct 8x8 inference flag */
+                  (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
+                  (1 << 2)  |   /* Frame MB only flag */
+                  (0 << 1)  |   /* MBAFF mode is in active */
+                  (0 << 0));    /* Field picture flag */
+    OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
+    OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
+                  (0xBB8 << 16) |       /* InterMbMaxSz */
+                  (0xEE8) );            /* IntraMbMaxSz */
+    OUT_BCS_BATCH(batch, 0);            /* Reserved */
+    OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
+    OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */	
+    OUT_BCS_BATCH(batch, 0x8C000000);
+    OUT_BCS_BATCH(batch, 0x00010000);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
+                                     struct gen6_encoder_context *gen6_encoder_context,
+                                     struct intel_batchbuffer *batch)
+{
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+    int i;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 71);
+
+    OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
+
+    /* Reference frames and Current frames */
+    /* the DW1-32 is for the direct MV for reference */
+    for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
+        if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
+            OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0,
+                          0);
+            OUT_BCS_BATCH(batch, 0);
+        } else {
+            OUT_BCS_BATCH(batch, 0);
+            OUT_BCS_BATCH(batch, 0);
+        }
+    }
+	OUT_BCS_BATCH(batch, 0);
+
+	/* the DW34-36 is the MV for the current reference */
+        OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0,
+                          0);
+
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+
+    /* POL list */
+    for(i = 0; i < 32; i++) {
+        OUT_BCS_BATCH(batch, i/2);
+    }
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void gen75_mfc_avc_directmode_state(VADriverContextP ctx,
+                                           struct gen6_encoder_context *gen6_encoder_context,
+                                           struct intel_batchbuffer *batch)
+{
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+    int i;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_mfc_avc_directmode_state_bplus(ctx, gen6_encoder_context, batch);
+	return;
+    }
+
+    BEGIN_BCS_BATCH(batch, 69);
+
+    OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
+    //TODO: reference DMV
+    for (i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i++){
+	if (mfc_context->direct_mv_buffers[i].bo)
+    		OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
+                	  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+	else
+        	OUT_BCS_BATCH(batch, 0);
+    }
+
+    //TODO: current DMV just for test
+#if 0
+    OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+#else
+    //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
+    //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
+    OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
+               	  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+#endif
+
+
+    OUT_BCS_BATCH(batch, 0);
+
+    //TODO: POL list
+    for(i = 0; i < 34; i++) {
+        OUT_BCS_BATCH(batch, 0);
+    }
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void gen75_mfc_avc_slice_state(VADriverContextP ctx,
+                                      int intra_slice,
+                                      struct gen6_encoder_context *gen6_encoder_context,
+                                      struct intel_batchbuffer *batch)
+{
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 11);;
+
+    OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
+
+    if ( intra_slice )
+        OUT_BCS_BATCH(batch, 2);			/*Slice Type: I Slice*/
+    else
+        OUT_BCS_BATCH(batch, 0);			/*Slice Type: P Slice*/
+
+    if ( intra_slice )
+        OUT_BCS_BATCH(batch, 0);			/*no reference frames and pred_weight_table*/
+    else 
+        OUT_BCS_BATCH(batch, 0x00010000); 	/*1 reference frame*/
+
+    OUT_BCS_BATCH(batch, (0<<24) |                /*Enable deblocking operation*/
+                  (26<<16) | 			/*Slice Quantization Parameter*/
+                  0x0202 );
+    OUT_BCS_BATCH(batch, 0);			/*First MB X&Y , the postion of current slice*/
+    OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
+
+    OUT_BCS_BATCH(batch, 
+                  (0<<31) |		/*RateControlCounterEnable = disable*/
+                  (1<<30) |		/*ResetRateControlCounter*/
+                  (2<<28) |		/*RC Triggle Mode = Loose Rate Control*/
+                  (1<<19) | 	        /*IsLastSlice*/
+                  (0<<18) | 	        /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
+                  (0<<17) |	        /*HeaderPresentFlag*/	
+                  (1<<16) |	        /*SliceData PresentFlag*/
+                  (0<<15) |	        /*TailPresentFlag*/
+                  (1<<13) |	        /*RBSP NAL TYPE*/	
+                  (0<<12) );	        /*CabacZeroWordInsertionEnable*/
+	
+
+    OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
+
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_qm_state(VADriverContextP ctx,
+                   int qm_type,
+                   unsigned int *qm,
+                   int qm_length,
+                   struct gen6_encoder_context *gen6_encoder_context,
+                   struct intel_batchbuffer *batch)
+{
+    unsigned int qm_buffer[16];
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    assert(qm_length <= 16);
+    assert(sizeof(*qm) == 4);
+    memcpy(qm_buffer, qm, qm_length * 4);
+
+    BEGIN_BCS_BATCH(batch, 18);
+    OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
+    OUT_BCS_BATCH(batch, qm_type << 0);
+    intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void gen75_mfc_avc_qm_state(VADriverContextP ctx,
+                                   struct gen6_encoder_context *gen6_encoder_context,
+                                   struct intel_batchbuffer *batch)
+{
+    unsigned int qm[16] = {
+        0x10101010, 0x10101010, 0x10101010, 0x10101010,
+        0x10101010, 0x10101010, 0x10101010, 0x10101010,
+        0x10101010, 0x10101010, 0x10101010, 0x10101010,
+        0x10101010, 0x10101010, 0x10101010, 0x10101010
+    };
+
+    gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context, batch);
+    gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context, batch);
+    gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context, batch);
+    gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context, batch);
+}
+
+static void
+gen75_mfc_fqm_state(VADriverContextP ctx,
+                    int fqm_type,
+                    unsigned int *fqm,
+                    int fqm_length,
+                    struct gen6_encoder_context *gen6_encoder_context,
+                    struct intel_batchbuffer *batch)
+{
+    unsigned int fqm_buffer[32];
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    assert(fqm_length <= 32);
+    assert(sizeof(*fqm) == 4);
+    memcpy(fqm_buffer, fqm, fqm_length * 4);
+
+    BEGIN_BCS_BATCH(batch, 34);
+    OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
+    OUT_BCS_BATCH(batch, fqm_type << 0);
+    intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void gen75_mfc_avc_fqm_state(VADriverContextP ctx,
+                                    struct gen6_encoder_context *gen6_encoder_context,
+                                    struct intel_batchbuffer *batch)
+{
+    unsigned int qm[32] = {
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000,
+        0x10001000, 0x10001000, 0x10001000, 0x10001000
+    };
+
+    gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context, batch);
+    gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context, batch);
+    gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context, batch);
+    gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context, batch);
+}
+
+static void gen75_mfc_avc_ref_idx_state(VADriverContextP ctx,
+                                        struct gen6_encoder_context *gen6_encoder_context,
+                                        struct intel_batchbuffer *batch)
+{
+    int i;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 10);
+
+    OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
+    OUT_BCS_BATCH(batch, 0);                  //Select L0
+
+    OUT_BCS_BATCH(batch, 0x80808000);         //Only 1 reference
+    for(i = 0; i < 7; i++) {
+        OUT_BCS_BATCH(batch, 0x80808080);
+    }
+
+    ADVANCE_BCS_BATCH(batch);
+}
+	
+static int
+gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
+                               struct gen6_encoder_context *gen6_encoder_context,
+                               struct intel_batchbuffer *batch)
+{
+    int len_in_dwords = 12;
+
+    unsigned int intra_msg;
+#define		INTRA_MSG_FLAG		(1 << 13)
+#define		INTRA_MBTYPE_MASK	(0x1F0000)
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, len_in_dwords);
+
+    intra_msg = msg[0] & 0xC0FF;
+    intra_msg |= INTRA_MSG_FLAG;
+    intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
+    OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 
+                  (0 << 24) |		/* PackedMvNum, Debug*/
+                  (0 << 20) | 		/* No motion vector */
+                  (1 << 19) |		/* CbpDcY */
+                  (1 << 18) |		/* CbpDcU */
+                  (1 << 17) |		/* CbpDcV */
+                  intra_msg);
+
+    OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);		/* Code Block Pattern for Y*/
+    OUT_BCS_BATCH(batch, 0x000F000F);							/* Code Block Pattern */		
+    OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);	/* Last MB */
+
+    /*Stuff for Intra MB*/
+    OUT_BCS_BATCH(batch, msg[1]);			/* We using Intra16x16 no 4x4 predmode*/	
+    OUT_BCS_BATCH(batch, msg[2]);	
+    OUT_BCS_BATCH(batch, msg[3]&0xFC);		
+
+    OUT_BCS_BATCH(batch, 0x00000);	/*MaxSizeInWord and TargetSzieInWord*/
+	OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+
+    return len_in_dwords;
+}
+
+static int gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
+                                          unsigned int offset, unsigned int *msg, struct gen6_encoder_context *gen6_encoder_context,
+                                          struct intel_batchbuffer *batch)
+{
+    int len_in_dwords = 12;
+    unsigned int inter_msg;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, len_in_dwords);
+
+    OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
+
+	inter_msg = 32;
+ 	/* MV quantity */
+	if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
+		if (msg[1] & SUBMB_SHAPE_MASK)
+			inter_msg = 128;
+	}
+    OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
+
+    OUT_BCS_BATCH(batch, offset);
+	inter_msg = msg[0] & (0x1F00FFFF);
+	inter_msg |= INTER_MV8;
+	if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
+	     		(msg[1] & SUBMB_SHAPE_MASK)) {
+		inter_msg |= INTER_MV32;
+	}
+
+    OUT_BCS_BATCH(batch, inter_msg);
+
+    OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
+    OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */    
+    OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);    /* Last MB */
+
+    /*Stuff for Inter MB*/
+	inter_msg = msg[1] >> 8;
+    OUT_BCS_BATCH(batch, inter_msg);        
+    OUT_BCS_BATCH(batch, 0x0);    
+    OUT_BCS_BATCH(batch, 0x0);        
+
+    OUT_BCS_BATCH(batch, 0x00000000); /*MaxSizeInWord and TargetSzieInWord*/
+
+    OUT_BCS_BATCH(batch, 0x0);        
+
+    ADVANCE_BCS_BATCH(batch);
+
+    return len_in_dwords;
+}
+
+static void gen75_mfc_init(VADriverContextP ctx,
+                           struct encode_state *encode_state,
+                           struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+    dri_bo *bo;
+    int i;
+    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+    int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+
+    /*Encode common setup for MFC*/
+    dri_bo_unreference(mfc_context->post_deblocking_output.bo);
+    mfc_context->post_deblocking_output.bo = NULL;
+
+    dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
+    mfc_context->pre_deblocking_output.bo = NULL;
+
+    dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
+    mfc_context->uncompressed_picture_source.bo = NULL;
+
+    dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
+    mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
+
+    for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
+        if (mfc_context->reference_surfaces[i].bo != NULL)
+            dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
+        mfc_context->reference_surfaces[i].bo = NULL;  
+    }
+
+    dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "Buffer",
+                      width_in_mbs * 64,
+                      64);
+    assert(bo);
+    mfc_context->intra_row_store_scratch_buffer.bo = bo;
+
+    dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "Buffer",
+                      4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
+                      64);
+    assert(bo);
+    mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+
+    dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "Buffer",
+                      2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
+                      0x1000);
+    assert(bo);
+    mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+}
+
+#define		INTRA_RDO_OFFSET	4
+#define		INTER_RDO_OFFSET	54
+#define		INTER_MSG_OFFSET	52
+#define		INTER_MV_OFFSET		224
+#define		RDO_MASK		0xFFFF
+
+static void gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
+                                      struct encode_state *encode_state,
+                                      struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *main_batch = gen6_encoder_context->base.batch;
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+    VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer; /* FIXME: multi slices */
+    unsigned int *msg = NULL, offset = 0;
+    unsigned char *msg_ptr = NULL;
+    int emit_new_state = 1, object_len_in_bytes;
+    int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+    int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+    int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+    int x,y, mb_index;
+    int inter_rdo, intra_rdo;
+    struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, width_in_mbs * height_in_mbs * 12 * 4 + 0x800);
+
+    intel_batchbuffer_start_atomic_bcs(batch, width_in_mbs * height_in_mbs * 12 * 4 + 0x700);
+
+    dri_bo_map(vme_context->vme_output.bo , 1);
+    msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
+    if (is_intra) {
+	msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
+    } else {
+	msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
+	offset = 0; 
+    }
+
+    for (y = 0; y < height_in_mbs; y++) {
+        for (x = 0; x < width_in_mbs; x++) { 
+            int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
+            int qp = pSequenceParameter->initial_qp;
+	     mb_index = (y * width_in_mbs) + x;
+            if (emit_new_state) {
+                intel_batchbuffer_emit_mi_flush(batch);
+                
+                gen75_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context, batch);
+                gen75_mfc_surface_state(ctx, gen6_encoder_context, batch);
+                gen75_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context, batch);
+
+                gen75_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context, batch);
+                gen75_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context, batch);
+
+                gen75_mfc_avc_img_state(ctx, gen6_encoder_context, batch);
+                gen75_mfc_avc_qm_state(ctx, gen6_encoder_context, batch);
+                gen75_mfc_avc_fqm_state(ctx, gen6_encoder_context, batch);
+                gen75_mfc_avc_directmode_state(ctx, gen6_encoder_context, batch);
+
+                gen75_mfc_avc_ref_idx_state(ctx, gen6_encoder_context, batch);
+                gen75_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context, batch);
+                emit_new_state = 0;
+            }
+
+	    msg = (unsigned int *) (msg_ptr + mb_index * vme_context->vme_output.size_block);
+            if (is_intra) {
+                object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, batch);
+            } else {
+		inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
+		intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
+		if (intra_rdo < inter_rdo) {
+                    object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, batch);
+		} else {
+                    msg += INTER_MSG_OFFSET;
+                    offset = mb_index * vme_context->vme_output.size_block + INTER_MV_OFFSET;
+                    object_len_in_bytes = gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, msg, gen6_encoder_context, batch);
+		}
+	    }
+            if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
+                intel_batchbuffer_end_atomic(batch);
+                intel_batchbuffer_flush(batch);
+                emit_new_state = 1;
+                intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+            }
+        }
+    }
+
+    dri_bo_unmap(vme_context->vme_output.bo);
+	
+    intel_batchbuffer_align(batch, 8);
+
+    BEGIN_BCS_BATCH(batch, 2);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
+    ADVANCE_BCS_BATCH(batch);
+
+    intel_batchbuffer_end_atomic(batch);
+
+    /* chain to the main batch buffer */
+    intel_batchbuffer_start_atomic_bcs(main_batch, 0x100);
+    intel_batchbuffer_emit_mi_flush(main_batch);
+    BEGIN_BCS_BATCH(main_batch, 2);
+    OUT_BCS_BATCH(main_batch, MI_BATCH_BUFFER_START | (1 << 8));
+    OUT_BCS_RELOC(main_batch,
+                  batch->buffer,
+                  I915_GEM_DOMAIN_COMMAND, 0,
+                  0);
+    ADVANCE_BCS_BATCH(main_batch);
+    intel_batchbuffer_end_atomic(main_batch);
+
+    // end programing             
+    intel_batchbuffer_free(batch);	
+}
+
+static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx, 
+                                     struct encode_state *encode_state,
+                                     struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+    struct object_surface *obj_surface;	
+    struct object_buffer *obj_buffer;
+    dri_bo *bo;
+    VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
+    VAStatus vaStatus = VA_STATUS_SUCCESS;
+
+    /*Setup all the input&output object*/
+    obj_surface = SURFACE(pPicParameter->reconstructed_picture);
+    assert(obj_surface);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+    mfc_context->post_deblocking_output.bo = obj_surface->bo;
+    dri_bo_reference(mfc_context->post_deblocking_output.bo);
+
+    mfc_context->surface_state.width = obj_surface->orig_width;
+    mfc_context->surface_state.height = obj_surface->orig_height;
+    mfc_context->surface_state.w_pitch = obj_surface->width;
+    mfc_context->surface_state.h_pitch = obj_surface->height;
+
+    obj_surface = SURFACE(pPicParameter->reference_picture);
+    assert(obj_surface);
+    if (obj_surface->bo != NULL) {
+        mfc_context->reference_surfaces[0].bo = obj_surface->bo;
+        dri_bo_reference(obj_surface->bo);
+    }
+	
+    obj_surface = SURFACE(encode_state->current_render_target);
+    assert(obj_surface && obj_surface->bo);
+    mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
+    dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
+
+    obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
+    bo = obj_buffer->buffer_store->bo;
+    assert(bo);
+    mfc_context->mfc_indirect_pak_bse_object.bo = bo;
+    mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
+    dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
+
+    /*Programing bcs pipeline*/
+    gen75_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context);	//filling the pipeline
+	
+    return vaStatus;
+}
+
+static VAStatus gen75_mfc_run(VADriverContextP ctx, 
+                             struct encode_state *encode_state,
+                             struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+
+    intel_batchbuffer_flush(batch);		//run the pipeline
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_mfc_stop(VADriverContextP ctx, 
+                              struct encode_state *encode_state,
+                              struct gen6_encoder_context *gen6_encoder_context)
+{
+#if 0
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+	
+    VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
+	
+    struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
+    //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
+    //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
+    my_debug(obj_surface);
+
+#endif
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
+                            struct encode_state *encode_state,
+                            struct gen6_encoder_context *gen6_encoder_context)
+{
+    gen75_mfc_init(ctx, encode_state, gen6_encoder_context);
+    gen75_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
+    gen75_mfc_run(ctx, encode_state, gen6_encoder_context);
+    gen75_mfc_stop(ctx, encode_state, gen6_encoder_context);
+
+    return VA_STATUS_SUCCESS;
+}
+
+VAStatus
+gen75_mfc_pipeline(VADriverContextP ctx,
+                  VAProfile profile,
+                  struct encode_state *encode_state,
+                  struct gen6_encoder_context *gen6_encoder_context)
+{
+    VAStatus vaStatus;
+
+    switch (profile) {
+    case VAProfileH264Baseline:
+        vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
+        break;
+
+        /* FIXME: add for other profile */
+    default:
+        vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+        break;
+    }
+
+    return vaStatus;
+}
+
+Bool gen75_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
+{
+    int i;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    dri_bo *bo;
+	
+    for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
+        dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
+        mfc_context->direct_mv_buffers[i].bo = NULL;
+    }
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                        "Buffer",
+                         68*8192,
+                         64);
+    mfc_context->direct_mv_buffers[0].bo = bo;
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                        "Buffer",
+                         68*8192,
+                         64);
+    mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = bo;
+    return True;
+}
+
+Bool gen75_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
+{
+    int i;
+
+    dri_bo_unreference(mfc_context->post_deblocking_output.bo);
+    mfc_context->post_deblocking_output.bo = NULL;
+
+    dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
+    mfc_context->pre_deblocking_output.bo = NULL;
+
+    dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
+    mfc_context->uncompressed_picture_source.bo = NULL;
+
+    dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
+    mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
+
+    for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
+        dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
+        mfc_context->direct_mv_buffers[i].bo = NULL;
+    }
+
+    dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
+    mfc_context->intra_row_store_scratch_buffer.bo = NULL;
+
+    dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
+    mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+
+    dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
+    mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+
+    return True;
+}
diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
new file mode 100644
index 0000000..78cb73b
--- /dev/null
+++ b/src/gen75_mfd.c
@@ -0,0 +1,3402 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Xiang Haihao <haihao.xiang at intel.com>
+ *    Zhao Yakui <yakui.zhao at intel.com>
+ *
+ */
+
+#ifndef HAVE_GEN_AVC_SURFACE
+#define HAVE_GEN_AVC_SURFACE 1
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "i965_decoder_utils.h"
+
+#include "gen7_mfd.h"
+
+#define B0_STEP_REV		2
+#define IS_STEPPING_BPLUS(i965)	((i965->intel.revision) >= B0_STEP_REV)
+
+static const uint32_t zigzag_direct[64] = {
+    0,   1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static void
+gen75_mfd_avc_frame_store_index(VADriverContextP ctx,
+                               VAPictureParameterBufferH264 *pic_param,
+                               struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int i, j;
+
+    assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
+
+    for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+        int found = 0;
+
+        if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
+            continue;
+
+        for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
+            VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
+            if (ref_pic->flags & VA_PICTURE_H264_INVALID)
+                continue;
+
+            if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
+                found = 1;
+                break;
+            }
+        }
+
+        if (!found) {
+            struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
+            obj_surface->flags &= ~SURFACE_REFERENCED;
+
+            if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
+                dri_bo_unreference(obj_surface->bo);
+                obj_surface->bo = NULL;
+                obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
+            }
+
+            if (obj_surface->free_private_data)
+                obj_surface->free_private_data(&obj_surface->private_data);
+
+            gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
+            gen7_mfd_context->reference_surface[i].frame_store_id = -1;
+        }
+    }
+
+    for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
+        VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
+        int found = 0;
+
+        if (ref_pic->flags & VA_PICTURE_H264_INVALID)
+            continue;
+
+        for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
+            if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
+                continue;
+            
+            if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
+                found = 1;
+                break;
+            }
+        }
+
+        if (!found) {
+            int frame_idx;
+            struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
+            
+            assert(obj_surface);
+            i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+
+            for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
+                for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
+                    if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
+                        continue;
+
+                    if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
+                        break;
+                }
+
+                if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
+                    break;
+            }
+
+            assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
+
+            for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
+                if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
+                    gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
+                    gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
+                    break;
+                }
+            }
+        }
+    }
+
+    /* sort */
+    for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
+        if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
+            gen7_mfd_context->reference_surface[i].frame_store_id == i)
+            continue;
+
+        for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
+            if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
+                gen7_mfd_context->reference_surface[j].frame_store_id == i) {
+                VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
+                int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
+
+                gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
+                gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
+                gen7_mfd_context->reference_surface[j].surface_id = id;
+                gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
+                break;
+            }
+        }
+    }
+}
+
+static void
+gen75_mfd_init_avc_surface(VADriverContextP ctx, 
+                          VAPictureParameterBufferH264 *pic_param,
+                          struct object_surface *obj_surface)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
+    int width_in_mbs, height_in_mbs;
+
+    obj_surface->free_private_data = gen_free_avc_surface;
+    width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+    height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
+
+    if (!gen7_avc_surface) {
+        gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+        assert((obj_surface->size & 0x3f) == 0);
+        obj_surface->private_data = gen7_avc_surface;
+    }
+
+    gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
+                                         !pic_param->seq_fields.bits.direct_8x8_inference_flag);
+
+    if (gen7_avc_surface->dmv_top == NULL) {
+        gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
+                                                 "direct mv w/r buffer",
+                                                 width_in_mbs * height_in_mbs * 128,
+                                                 0x1000);
+        assert(gen7_avc_surface->dmv_top);
+    }
+
+    if (gen7_avc_surface->dmv_bottom_flag &&
+        gen7_avc_surface->dmv_bottom == NULL) {
+        gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
+                                                    "direct mv w/r buffer",
+                                                    width_in_mbs * height_in_mbs * 128,                                                    
+                                                    0x1000);
+        assert(gen7_avc_surface->dmv_bottom);
+    }
+}
+
+static void
+gen75_mfd_pipe_mode_select(VADriverContextP ctx,
+                          struct decode_state *decode_state,
+                          int standard_select,
+                          struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+    assert(standard_select == MFX_FORMAT_MPEG2 ||
+           standard_select == MFX_FORMAT_AVC ||
+           standard_select == MFX_FORMAT_VC1 ||
+           standard_select == MFX_FORMAT_JPEG);
+
+    BEGIN_BCS_BATCH(batch, 5);
+    OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
+    OUT_BCS_BATCH(batch,
+                  (MFX_LONG_MODE << 17) | /* Currently only support long format */
+                  (MFD_MODE_VLD << 15) | /* VLD mode */
+                  (0 << 10) | /* disable Stream-Out */
+                  (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
+                  (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
+                  (0 << 5)  | /* not in stitch mode */
+                  (MFX_CODEC_DECODE << 4)  | /* decoding mode */
+                  (standard_select << 0));
+    OUT_BCS_BATCH(batch,
+                  (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
+                  (0 << 3)  | /* terminate if AVC mbdata error occurs */
+                  (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
+                  (0 << 1)  |
+                  (0 << 0));
+    OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
+    OUT_BCS_BATCH(batch, 0); /* reserved */
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_surface_state(VADriverContextP ctx,
+                       struct decode_state *decode_state,
+                       int standard_select,
+                       struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
+    unsigned int y_cb_offset;
+    unsigned int y_cr_offset;
+
+    assert(obj_surface);
+
+    y_cb_offset = obj_surface->y_cb_offset;
+    y_cr_offset = obj_surface->y_cr_offset;
+
+    BEGIN_BCS_BATCH(batch, 6);
+    OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch,
+                  ((obj_surface->orig_height - 1) << 18) |
+                  ((obj_surface->orig_width - 1) << 4));
+    OUT_BCS_BATCH(batch,
+                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+                  ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
+                  (0 << 22) | /* surface object control state, ignored */
+                  ((obj_surface->width - 1) << 3) | /* pitch */
+                  (0 << 2)  | /* must be 0 */
+                  (1 << 1)  | /* must be tiled */
+                  (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
+    OUT_BCS_BATCH(batch,
+                  (0 << 16) | /* X offset for U(Cb), must be 0 */
+                  (y_cb_offset << 0)); /* Y offset for U(Cb) */
+    OUT_BCS_BATCH(batch,
+                  (0 << 16) | /* X offset for V(Cr), must be 0 */
+                  (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
+    ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
+                             struct decode_state *decode_state,
+                             int standard_select,
+                             struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int i;
+
+    BEGIN_BCS_BATCH(batch, 61);
+    OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
+	/* Pre-deblock 1-3 */
+    if (gen7_mfd_context->pre_deblocking_output.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	/* Post-debloing 4-6 */
+    if (gen7_mfd_context->post_deblocking_output.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+
+	/* uncompressed-video & stream out 7-12 */
+    OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+    OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+
+	/* intra row-store scratch 13-15 */
+    if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	/* deblocking-filter-row-store 16-18 */
+    if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+
+    /* DW 19..50 */
+    for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+        struct object_surface *obj_surface;
+
+        if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+            obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
+            assert(obj_surface && obj_surface->bo);
+
+            OUT_BCS_RELOC(batch, obj_surface->bo,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0,
+                          0);
+        } else {
+            OUT_BCS_BATCH(batch, 0);
+        }
+            OUT_BCS_BATCH(batch, 0);
+    }
+	/* reference property 51 */
+    OUT_BCS_BATCH(batch, 0);  
+	
+	/* Macroblock status & ILDB 52-57 */
+        OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+
+	/* the second Macroblock status 58-60 */	
+        OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
+                             struct decode_state *decode_state,
+                             int standard_select,
+                             struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int i;
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
+			standard_select, gen7_mfd_context);
+	return;
+    }
+
+    BEGIN_BCS_BATCH(batch, 25);
+    OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
+    if (gen7_mfd_context->pre_deblocking_output.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+    if (gen7_mfd_context->post_deblocking_output.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+    OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+    OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+
+    if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+    if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+    /* DW 7..22 */
+    for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+        struct object_surface *obj_surface;
+
+        if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+            obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
+            assert(obj_surface && obj_surface->bo);
+
+            OUT_BCS_RELOC(batch, obj_surface->bo,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0,
+                          0);
+        } else {
+            OUT_BCS_BATCH(batch, 0);
+        }
+    }
+
+    OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
+    OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
+                                 dri_bo *slice_data_bo,
+                                 int standard_select,
+                                 struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 26);
+    OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
+	/* MFX In BS 1-5 */
+    OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+	/* Upper bound 4-5 */	
+    OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+    OUT_BCS_BATCH(batch, 0);
+
+	/* MFX indirect MV 6-10 */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+	
+	/* MFX IT_COFF 11-15 */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+	/* MFX IT_DBLK 16-20 */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+	/* MFX PAK_BSE object for encoder 21-25 */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+ 
+static void
+gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
+                                 dri_bo *slice_data_bo,
+                                 int standard_select,
+                                 struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
+				standard_select, gen7_mfd_context);
+	return;
+    }
+
+    BEGIN_BCS_BATCH(batch, 11);
+    OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
+    OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
+    OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
+                                 struct decode_state *decode_state,
+                                 int standard_select,
+                                 struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 10);
+    OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
+
+    if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+	else
+    		OUT_BCS_BATCH(batch, 0);
+		
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+	/* MPR Row Store Scratch buffer 4-6 */
+    if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+	    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+	/* Bitplane 7-9 */ 
+    if (gen7_mfd_context->bitplane_read_buffer.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      0);
+    else
+    	OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
+                                 struct decode_state *decode_state,
+                                 int standard_select,
+                                 struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
+				standard_select, gen7_mfd_context);
+	return;
+     }
+ 
+    BEGIN_BCS_BATCH(batch, 4);
+    OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
+
+    if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+    if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+    if (gen7_mfd_context->bitplane_read_buffer.valid)
+        OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_qm_state(VADriverContextP ctx,
+                  int qm_type,
+                  unsigned char *qm,
+                  int qm_length,
+                  struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    unsigned int qm_buffer[16];
+
+    assert(qm_length <= 16 * 4);
+    memcpy(qm_buffer, qm, qm_length);
+
+    BEGIN_BCS_BATCH(batch, 18);
+    OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
+    OUT_BCS_BATCH(batch, qm_type << 0);
+    intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_avc_img_state(VADriverContextP ctx,
+                       struct decode_state *decode_state,
+                       struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    int img_struct;
+    int mbaff_frame_flag;
+    unsigned int width_in_mbs, height_in_mbs;
+    VAPictureParameterBufferH264 *pic_param;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+    assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
+
+    if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
+        img_struct = 1;
+    else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
+        img_struct = 3;
+    else
+        img_struct = 0;
+
+    if ((img_struct & 0x1) == 0x1) {
+        assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
+    } else {
+        assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
+    }
+
+    if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
+        assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
+        assert(pic_param->pic_fields.bits.field_pic_flag == 0);
+    } else {
+        assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
+    }
+
+    mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
+                        !pic_param->pic_fields.bits.field_pic_flag);
+
+    width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+    height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
+
+    /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
+    assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
+           pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
+    assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
+
+    BEGIN_BCS_BATCH(batch, 16);
+    OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
+    OUT_BCS_BATCH(batch, 
+                  width_in_mbs * height_in_mbs);
+    OUT_BCS_BATCH(batch, 
+                  ((height_in_mbs - 1) << 16) | 
+                  ((width_in_mbs - 1) << 0));
+    OUT_BCS_BATCH(batch, 
+                  ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
+                  ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
+                  (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
+                  (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
+                  (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
+                  (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
+                  (img_struct << 8));
+    OUT_BCS_BATCH(batch,
+                  (pic_param->seq_fields.bits.chroma_format_idc << 10) |
+                  (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
+                  ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
+                  (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
+                  (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
+                  (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
+                  (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
+                  (mbaff_frame_flag << 1) |
+                  (pic_param->pic_fields.bits.field_pic_flag << 0));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_avc_qm_state(VADriverContextP ctx,
+                      struct decode_state *decode_state,
+                      struct gen7_mfd_context *gen7_mfd_context)
+{
+    VAIQMatrixBufferH264 *iq_matrix;
+    VAPictureParameterBufferH264 *pic_param;
+
+    if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
+        iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
+    else
+        iq_matrix = &gen7_mfd_context->iq_matrix.h264;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+    gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
+    gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
+
+    if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
+        gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
+        gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
+    }
+}
+
+static void
+gen75_mfd_avc_picid_state(VADriverContextP ctx,
+                      struct decode_state *decode_state,
+                      struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 10);
+    OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
+    OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
+                              VAPictureParameterBufferH264 *pic_param,
+                              VASliceParameterBufferH264 *slice_param,
+                              struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    struct object_surface *obj_surface;
+    GenAvcSurface *gen7_avc_surface;
+    VAPictureH264 *va_pic;
+    int i, j;
+
+    BEGIN_BCS_BATCH(batch, 71);
+    OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
+
+    /* reference surfaces 0..15 */
+    for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+        if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+            obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
+            assert(obj_surface);
+            gen7_avc_surface = obj_surface->private_data;
+
+            if (gen7_avc_surface == NULL) {
+                OUT_BCS_BATCH(batch, 0);
+                OUT_BCS_BATCH(batch, 0);
+            } else {
+                OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+                              I915_GEM_DOMAIN_INSTRUCTION, 0,
+                              0);
+                OUT_BCS_BATCH(batch, 0);
+            }
+        } else {
+            OUT_BCS_BATCH(batch, 0);
+            OUT_BCS_BATCH(batch, 0);
+        }
+    }
+	OUT_BCS_BATCH(batch, 0);
+
+    /* the current decoding frame/field */
+    va_pic = &pic_param->CurrPic;
+    assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+    obj_surface = SURFACE(va_pic->picture_id);
+    assert(obj_surface && obj_surface->bo && obj_surface->private_data);
+    gen7_avc_surface = obj_surface->private_data;
+
+    OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+
+    /* POC List */
+    for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+        if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+            int found = 0;
+            for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
+                va_pic = &pic_param->ReferenceFrames[j];
+                
+                if (va_pic->flags & VA_PICTURE_H264_INVALID)
+                    continue;
+
+                if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
+                    found = 1;
+                    break;
+                }
+            }
+
+            assert(found == 1);
+            assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+            
+            OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
+            OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
+        } else {
+            OUT_BCS_BATCH(batch, 0);
+            OUT_BCS_BATCH(batch, 0);
+        }
+    }
+
+    va_pic = &pic_param->CurrPic;
+    OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
+    OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_avc_directmode_state(VADriverContextP ctx,
+                              VAPictureParameterBufferH264 *pic_param,
+                              VASliceParameterBufferH264 *slice_param,
+                              struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    struct object_surface *obj_surface;
+    GenAvcSurface *gen7_avc_surface;
+    VAPictureH264 *va_pic;
+    int i, j;
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_mfd_avc_directmode_state_bplus(ctx, pic_param, slice_param,
+		gen7_mfd_context);
+	return;
+    }
+
+    BEGIN_BCS_BATCH(batch, 69);
+    OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
+
+    /* reference surfaces 0..15 */
+    for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+        if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+            obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
+            assert(obj_surface);
+            gen7_avc_surface = obj_surface->private_data;
+
+            if (gen7_avc_surface == NULL) {
+                OUT_BCS_BATCH(batch, 0);
+                OUT_BCS_BATCH(batch, 0);
+            } else {
+                OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+                              I915_GEM_DOMAIN_INSTRUCTION, 0,
+                              0);
+
+                if (gen7_avc_surface->dmv_bottom_flag == 1)
+                    OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
+                                  I915_GEM_DOMAIN_INSTRUCTION, 0,
+                                  0);
+                else
+                    OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+                                  I915_GEM_DOMAIN_INSTRUCTION, 0,
+                                  0);
+            }
+        } else {
+            OUT_BCS_BATCH(batch, 0);
+            OUT_BCS_BATCH(batch, 0);
+        }
+    }
+
+    /* the current decoding frame/field */
+    va_pic = &pic_param->CurrPic;
+    assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+    obj_surface = SURFACE(va_pic->picture_id);
+    assert(obj_surface && obj_surface->bo && obj_surface->private_data);
+    gen7_avc_surface = obj_surface->private_data;
+
+    OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+
+    if (gen7_avc_surface->dmv_bottom_flag == 1)
+        OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+
+    /* POC List */
+    for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+        if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+            int found = 0;
+            for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
+                va_pic = &pic_param->ReferenceFrames[j];
+                
+                if (va_pic->flags & VA_PICTURE_H264_INVALID)
+                    continue;
+
+                if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
+                    found = 1;
+                    break;
+                }
+            }
+
+            assert(found == 1);
+            assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+            
+            OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
+            OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
+        } else {
+            OUT_BCS_BATCH(batch, 0);
+            OUT_BCS_BATCH(batch, 0);
+        }
+    }
+
+    va_pic = &pic_param->CurrPic;
+    OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
+    OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_avc_slice_state(VADriverContextP ctx,
+                         VAPictureParameterBufferH264 *pic_param,
+                         VASliceParameterBufferH264 *slice_param,
+                         VASliceParameterBufferH264 *next_slice_param,
+                         struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+    int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
+    int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
+    int num_ref_idx_l0, num_ref_idx_l1;
+    int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
+                         pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
+    int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
+    int slice_type;
+
+    if (slice_param->slice_type == SLICE_TYPE_I ||
+        slice_param->slice_type == SLICE_TYPE_SI) {
+        slice_type = SLICE_TYPE_I;
+    } else if (slice_param->slice_type == SLICE_TYPE_P ||
+               slice_param->slice_type == SLICE_TYPE_SP) {
+        slice_type = SLICE_TYPE_P;
+    } else { 
+        assert(slice_param->slice_type == SLICE_TYPE_B);
+        slice_type = SLICE_TYPE_B;
+    }
+
+    if (slice_type == SLICE_TYPE_I) {
+        assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
+        assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+        num_ref_idx_l0 = 0;
+        num_ref_idx_l1 = 0;
+    } else if (slice_type == SLICE_TYPE_P) {
+        assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+        num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+        num_ref_idx_l1 = 0;
+    } else {
+        num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+        num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
+    }
+
+    first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
+    slice_hor_pos = first_mb_in_slice % width_in_mbs; 
+    slice_ver_pos = first_mb_in_slice / width_in_mbs;
+
+    if (next_slice_param) {
+        first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
+        next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
+        next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
+    } else {
+        next_slice_hor_pos = 0;
+        next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
+    }
+
+    BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
+    OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
+    OUT_BCS_BATCH(batch, slice_type);
+    OUT_BCS_BATCH(batch, 
+                  (num_ref_idx_l1 << 24) |
+                  (num_ref_idx_l0 << 16) |
+                  (slice_param->chroma_log2_weight_denom << 8) |
+                  (slice_param->luma_log2_weight_denom << 0));
+    OUT_BCS_BATCH(batch, 
+                  (slice_param->direct_spatial_mv_pred_flag << 29) |
+                  (slice_param->disable_deblocking_filter_idc << 27) |
+                  (slice_param->cabac_init_idc << 24) |
+                  ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
+                  ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
+                  ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
+    OUT_BCS_BATCH(batch, 
+                  (slice_ver_pos << 24) |
+                  (slice_hor_pos << 16) | 
+                  (first_mb_in_slice << 0));
+    OUT_BCS_BATCH(batch,
+                  (next_slice_ver_pos << 16) |
+                  (next_slice_hor_pos << 0));
+    OUT_BCS_BATCH(batch, 
+                  (next_slice_param == NULL) << 19); /* last slice flag */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static inline void
+gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
+                           VAPictureParameterBufferH264 *pic_param,
+                           VASliceParameterBufferH264 *slice_param,
+                           struct gen7_mfd_context *gen7_mfd_context)
+{
+    gen6_send_avc_ref_idx_state(
+        gen7_mfd_context->base.batch,
+        slice_param,
+        gen7_mfd_context->reference_surface
+    );
+}
+
+static void
+gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
+                                VAPictureParameterBufferH264 *pic_param,
+                                VASliceParameterBufferH264 *slice_param,
+                                struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    int i, j, num_weight_offset_table = 0;
+    short weightoffsets[32 * 6];
+
+    if ((slice_param->slice_type == SLICE_TYPE_P ||
+         slice_param->slice_type == SLICE_TYPE_SP) &&
+        (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
+        num_weight_offset_table = 1;
+    }
+    
+    if ((slice_param->slice_type == SLICE_TYPE_B) &&
+        (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
+        num_weight_offset_table = 2;
+    }
+
+    for (i = 0; i < num_weight_offset_table; i++) {
+        BEGIN_BCS_BATCH(batch, 98);
+        OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
+        OUT_BCS_BATCH(batch, i);
+
+        if (i == 0) {
+            for (j = 0; j < 32; j++) {
+                weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
+                weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
+                weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
+                weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
+                weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
+                weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
+            }
+        } else {
+            for (j = 0; j < 32; j++) {
+                weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
+                weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
+                weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
+                weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
+                weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
+                weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
+            }
+        }
+
+        intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
+        ADVANCE_BCS_BATCH(batch);
+    }
+}
+
+static void
+gen75_mfd_avc_bsd_object(VADriverContextP ctx,
+                        VAPictureParameterBufferH264 *pic_param,
+                        VASliceParameterBufferH264 *slice_param,
+                        dri_bo *slice_data_bo,
+                        VASliceParameterBufferH264 *next_slice_param,
+                        struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    unsigned int slice_data_bit_offset;
+
+    slice_data_bit_offset = avc_get_first_mb_bit_offset(
+        slice_data_bo,
+        slice_param,
+        pic_param->pic_fields.bits.entropy_coding_mode_flag
+    );
+
+    /* the input bitsteam format on GEN7 differs from GEN6 */
+    BEGIN_BCS_BATCH(batch, 6);
+    OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
+    OUT_BCS_BATCH(batch, 
+                  (slice_param->slice_data_size - slice_param->slice_data_offset));
+    OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
+    OUT_BCS_BATCH(batch,
+                  (0 << 31) |
+                  (0 << 14) |
+                  (0 << 12) |
+                  (0 << 10) |
+                  (0 << 8));
+    OUT_BCS_BATCH(batch,
+                  ((slice_data_bit_offset >> 3) << 16) |
+                  (0 << 5)  |
+                  (0 << 4)  |
+                  ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
+                  (slice_data_bit_offset & 0x7));
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static inline void
+gen75_mfd_avc_context_init(
+    VADriverContextP         ctx,
+    struct gen7_mfd_context *gen7_mfd_context
+)
+{
+    /* Initialize flat scaling lists */
+    avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
+}
+
+static void
+gen75_mfd_avc_decode_init(VADriverContextP ctx,
+                         struct decode_state *decode_state,
+                         struct gen7_mfd_context *gen7_mfd_context)
+{
+    VAPictureParameterBufferH264 *pic_param;
+    VASliceParameterBufferH264 *slice_param;
+    VAPictureH264 *va_pic;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    dri_bo *bo;
+    int i, j, enable_avc_ildb = 0;
+    unsigned int width_in_mbs, height_in_mbs;
+
+    for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
+        assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+        slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
+
+        for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+            assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+            assert((slice_param->slice_type == SLICE_TYPE_I) ||
+                   (slice_param->slice_type == SLICE_TYPE_SI) ||
+                   (slice_param->slice_type == SLICE_TYPE_P) ||
+                   (slice_param->slice_type == SLICE_TYPE_SP) ||
+                   (slice_param->slice_type == SLICE_TYPE_B));
+
+            if (slice_param->disable_deblocking_filter_idc != 1) {
+                enable_avc_ildb = 1;
+                break;
+            }
+
+            slice_param++;
+        }
+    }
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+    gen75_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
+    width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+    height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
+    assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
+    assert(height_in_mbs > 0 && height_in_mbs <= 256);
+
+    /* Current decoded picture */
+    va_pic = &pic_param->CurrPic;
+    assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+    obj_surface = SURFACE(va_pic->picture_id);
+    assert(obj_surface);
+    obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
+    obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+    gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
+
+    dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
+    gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
+    dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
+    gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
+
+    dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+    gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+    dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+    gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
+
+    dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "intra row store",
+                      width_in_mbs * 64,
+                      0x1000);
+    assert(bo);
+    gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
+    gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
+
+    dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "deblocking filter row store",
+                      width_in_mbs * 64 * 4,
+                      0x1000);
+    assert(bo);
+    gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+    gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
+
+    dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "bsd mpc row store",
+                      width_in_mbs * 64 * 2,
+                      0x1000);
+    assert(bo);
+    gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+    gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+    dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "mpr row store",
+                      width_in_mbs * 64 * 2,
+                      0x1000);
+    assert(bo);
+    gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
+    gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
+
+    gen7_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static void
+gen75_mfd_avc_decode_picture(VADriverContextP ctx,
+                            struct decode_state *decode_state,
+                            struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    VAPictureParameterBufferH264 *pic_param;
+    VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
+    dri_bo *slice_data_bo;
+    int i, j;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+    gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
+
+    intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+    intel_batchbuffer_emit_mi_flush(batch);
+    gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+    gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+    gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+    gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+    gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
+    gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
+    gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
+
+    for (j = 0; j < decode_state->num_slice_params; j++) {
+        assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+        slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
+        slice_data_bo = decode_state->slice_datas[j]->bo;
+        gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
+
+        if (j == decode_state->num_slice_params - 1)
+            next_slice_group_param = NULL;
+        else
+            next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
+
+        for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+            assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+            assert((slice_param->slice_type == SLICE_TYPE_I) ||
+                   (slice_param->slice_type == SLICE_TYPE_SI) ||
+                   (slice_param->slice_type == SLICE_TYPE_P) ||
+                   (slice_param->slice_type == SLICE_TYPE_SP) ||
+                   (slice_param->slice_type == SLICE_TYPE_B));
+
+            if (i < decode_state->slice_params[j]->num_elements - 1)
+                next_slice_param = slice_param + 1;
+            else
+                next_slice_param = next_slice_group_param;
+
+            gen75_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
+            gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
+            gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
+            gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
+            gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
+            slice_param++;
+        }
+    }
+
+    intel_batchbuffer_end_atomic(batch);
+    intel_batchbuffer_flush(batch);
+}
+
+static void
+gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
+                           struct decode_state *decode_state,
+                           struct gen7_mfd_context *gen7_mfd_context)
+{
+    VAPictureParameterBufferMPEG2 *pic_param;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    dri_bo *bo;
+    unsigned int width_in_mbs;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+    width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
+
+    mpeg2_set_reference_surfaces(
+        ctx,
+        gen7_mfd_context->reference_surface,
+        decode_state,
+        pic_param
+    );
+
+    /* Current decoded picture */
+    obj_surface = SURFACE(decode_state->current_render_target);
+    assert(obj_surface);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+
+    dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+    gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+    dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+    gen7_mfd_context->pre_deblocking_output.valid = 1;
+
+    dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "bsd mpc row store",
+                      width_in_mbs * 96,
+                      0x1000);
+    assert(bo);
+    gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+    gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+    gen7_mfd_context->post_deblocking_output.valid = 0;
+    gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
+    gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
+    gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
+    gen7_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static void
+gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
+                         struct decode_state *decode_state,
+                         struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    VAPictureParameterBufferMPEG2 *pic_param;
+    unsigned int slice_concealment_disable_bit = 0;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+
+	/* XXX: disable concealment for now */
+	slice_concealment_disable_bit = 1;
+
+    BEGIN_BCS_BATCH(batch, 13);
+    OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
+    OUT_BCS_BATCH(batch,
+                  (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
+                  ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
+                  ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
+                  ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
+                  pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
+                  pic_param->picture_coding_extension.bits.picture_structure << 12 |
+                  pic_param->picture_coding_extension.bits.top_field_first << 11 |
+                  pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
+                  pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
+                  pic_param->picture_coding_extension.bits.q_scale_type << 8 |
+                  pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
+                  pic_param->picture_coding_extension.bits.alternate_scan << 6);
+    OUT_BCS_BATCH(batch,
+                  pic_param->picture_coding_type << 9);
+    OUT_BCS_BATCH(batch,
+		  (slice_concealment_disable_bit << 31) |
+                  ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
+                  ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
+                        struct decode_state *decode_state,
+                        struct gen7_mfd_context *gen7_mfd_context)
+{
+    VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
+    int i, j;
+
+    /* Update internal QM state */
+    if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
+        VAIQMatrixBufferMPEG2 * const iq_matrix =
+            (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
+
+        if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
+            iq_matrix->load_intra_quantiser_matrix) {
+            gen_iq_matrix->load_intra_quantiser_matrix =
+                iq_matrix->load_intra_quantiser_matrix;
+            if (iq_matrix->load_intra_quantiser_matrix) {
+                for (j = 0; j < 64; j++)
+                    gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
+                        iq_matrix->intra_quantiser_matrix[j];
+            }
+        }
+
+        if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
+            iq_matrix->load_non_intra_quantiser_matrix) {
+            gen_iq_matrix->load_non_intra_quantiser_matrix =
+                iq_matrix->load_non_intra_quantiser_matrix;
+            if (iq_matrix->load_non_intra_quantiser_matrix) {
+                for (j = 0; j < 64; j++)
+                    gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
+                        iq_matrix->non_intra_quantiser_matrix[j];
+            }
+        }
+    }
+
+    /* Commit QM state to HW */
+    for (i = 0; i < 2; i++) {
+        unsigned char *qm = NULL;
+        int qm_type;
+
+        if (i == 0) {
+            if (gen_iq_matrix->load_intra_quantiser_matrix) {
+                qm = gen_iq_matrix->intra_quantiser_matrix;
+                qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
+            }
+        } else {
+            if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
+                qm = gen_iq_matrix->non_intra_quantiser_matrix;
+                qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
+            }
+        }
+
+        if (!qm)
+            continue;
+
+        gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
+    }
+}
+
+static void
+gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
+                          VAPictureParameterBufferMPEG2 *pic_param,
+                          VASliceParameterBufferMPEG2 *slice_param,
+                          VASliceParameterBufferMPEG2 *next_slice_param,
+                          struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
+    int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
+
+    if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
+        pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
+        is_field_pic = 1;
+    is_field_pic_wa = is_field_pic &&
+        gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
+
+    vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
+    hpos0 = slice_param->slice_horizontal_position;
+
+    if (next_slice_param == NULL) {
+        vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
+        hpos1 = 0;
+    } else {
+        vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
+        hpos1 = next_slice_param->slice_horizontal_position;
+    }
+
+    mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
+
+    BEGIN_BCS_BATCH(batch, 5);
+    OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
+    OUT_BCS_BATCH(batch, 
+                  slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
+    OUT_BCS_BATCH(batch, 
+                  slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
+    OUT_BCS_BATCH(batch,
+                  hpos0 << 24 |
+                  vpos0 << 16 |
+                  mb_count << 8 |
+                  (next_slice_param == NULL) << 5 |
+                  (next_slice_param == NULL) << 3 |
+                  (slice_param->macroblock_offset & 0x7));
+    OUT_BCS_BATCH(batch,
+                  (slice_param->quantiser_scale_code << 24) |
+		  (vpos1 << 8 | hpos1));
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
+                              struct decode_state *decode_state,
+                              struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    VAPictureParameterBufferMPEG2 *pic_param;
+    VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
+    dri_bo *slice_data_bo;
+    int i, j;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+
+    gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
+    intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+    intel_batchbuffer_emit_mi_flush(batch);
+    gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+    gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+    gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+    gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+    gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
+    gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
+
+    if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
+        gen7_mfd_context->wa_mpeg2_slice_vertical_position =
+            mpeg2_wa_slice_vertical_position(decode_state, pic_param);
+
+    for (j = 0; j < decode_state->num_slice_params; j++) {
+        assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+        slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
+        slice_data_bo = decode_state->slice_datas[j]->bo;
+        gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
+
+        if (j == decode_state->num_slice_params - 1)
+            next_slice_group_param = NULL;
+        else
+            next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
+
+        for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+            assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+            if (i < decode_state->slice_params[j]->num_elements - 1)
+                next_slice_param = slice_param + 1;
+            else
+                next_slice_param = next_slice_group_param;
+
+            gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
+            slice_param++;
+        }
+    }
+
+    intel_batchbuffer_end_atomic(batch);
+    intel_batchbuffer_flush(batch);
+}
+
+static const int va_to_gen7_vc1_pic_type[5] = {
+    GEN7_VC1_I_PICTURE,
+    GEN7_VC1_P_PICTURE,
+    GEN7_VC1_B_PICTURE,
+    GEN7_VC1_BI_PICTURE,
+    GEN7_VC1_P_PICTURE,
+};
+
+static const int va_to_gen7_vc1_mv[4] = {
+    1, /* 1-MV */
+    2, /* 1-MV half-pel */
+    3, /* 1-MV half-pef bilinear */
+    0, /* Mixed MV */
+};
+
+static const int b_picture_scale_factor[21] = {
+    128, 85,  170, 64,  192,
+    51,  102, 153, 204, 43,
+    215, 37,  74,  111, 148,
+    185, 222, 32,  96,  160, 
+    224,
+};
+
+static const int va_to_gen7_vc1_condover[3] = {
+    0,
+    2,
+    3
+};
+
+static const int va_to_gen7_vc1_profile[4] = {
+    GEN7_VC1_SIMPLE_PROFILE,
+    GEN7_VC1_MAIN_PROFILE,
+    GEN7_VC1_RESERVED_PROFILE,
+    GEN7_VC1_ADVANCED_PROFILE
+};
+
+static void 
+gen75_mfd_free_vc1_surface(void **data)
+{
+    struct gen7_vc1_surface *gen7_vc1_surface = *data;
+
+    if (!gen7_vc1_surface)
+        return;
+
+    dri_bo_unreference(gen7_vc1_surface->dmv);
+    free(gen7_vc1_surface);
+    *data = NULL;
+}
+
+static void
+gen75_mfd_init_vc1_surface(VADriverContextP ctx, 
+                          VAPictureParameterBufferVC1 *pic_param,
+                          struct object_surface *obj_surface)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
+    int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
+    int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
+
+    obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
+
+    if (!gen7_vc1_surface) {
+        gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
+        assert((obj_surface->size & 0x3f) == 0);
+        obj_surface->private_data = gen7_vc1_surface;
+    }
+
+    gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
+
+    if (gen7_vc1_surface->dmv == NULL) {
+        gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
+                                             "direct mv w/r buffer",
+                                             width_in_mbs * height_in_mbs * 64,
+                                             0x1000);
+    }
+}
+
+static void
+gen75_mfd_vc1_decode_init(VADriverContextP ctx,
+                         struct decode_state *decode_state,
+                         struct gen7_mfd_context *gen7_mfd_context)
+{
+    VAPictureParameterBufferVC1 *pic_param;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    int i;
+    dri_bo *bo;
+    int width_in_mbs;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+    width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
+
+    /* reference picture */
+    obj_surface = SURFACE(pic_param->forward_reference_picture);
+
+    if (obj_surface && obj_surface->bo)
+        gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
+    else
+        gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
+
+    obj_surface = SURFACE(pic_param->backward_reference_picture);
+
+    if (obj_surface && obj_surface->bo)
+        gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
+    else
+        gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
+
+    /* must do so !!! */
+    for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
+        gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
+
+    /* Current decoded picture */
+    obj_surface = SURFACE(decode_state->current_render_target);
+    assert(obj_surface);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+    gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
+
+    dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
+    gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
+    dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
+    gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
+
+    dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+    gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+    dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+    gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
+
+    dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "intra row store",
+                      width_in_mbs * 64,
+                      0x1000);
+    assert(bo);
+    gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
+    gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
+
+    dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "deblocking filter row store",
+                      width_in_mbs * 6 * 64,
+                      0x1000);
+    assert(bo);
+    gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+    gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
+
+    dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "bsd mpc row store",
+                      width_in_mbs * 96,
+                      0x1000);
+    assert(bo);
+    gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+    gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+    gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
+
+    gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
+    dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
+    
+    if (gen7_mfd_context->bitplane_read_buffer.valid) {
+        int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
+        int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
+        int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
+        int src_w, src_h;
+        uint8_t *src = NULL, *dst = NULL;
+
+        assert(decode_state->bit_plane->buffer);
+        src = decode_state->bit_plane->buffer;
+
+        bo = dri_bo_alloc(i965->intel.bufmgr,
+                          "VC-1 Bitplane",
+                          bitplane_width * height_in_mbs,
+                          0x1000);
+        assert(bo);
+        gen7_mfd_context->bitplane_read_buffer.bo = bo;
+
+        dri_bo_map(bo, True);
+        assert(bo->virtual);
+        dst = bo->virtual;
+
+        for (src_h = 0; src_h < height_in_mbs; src_h++) {
+            for(src_w = 0; src_w < width_in_mbs; src_w++) {
+                int src_index, dst_index;
+                int src_shift;
+                uint8_t src_value;
+
+                src_index = (src_h * width_in_mbs + src_w) / 2;
+                src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
+                src_value = ((src[src_index] >> src_shift) & 0xf);
+
+                dst_index = src_w / 2;
+                dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
+            }
+
+            if (src_w & 1)
+                dst[src_w / 2] >>= 4;
+
+            dst += bitplane_width;
+        }
+
+        dri_bo_unmap(bo);
+    } else
+        gen7_mfd_context->bitplane_read_buffer.bo = NULL;
+}
+
+static void
+gen75_mfd_vc1_pic_state(VADriverContextP ctx,
+                       struct decode_state *decode_state,
+                       struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    VAPictureParameterBufferVC1 *pic_param;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
+    int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
+    int unified_mv_mode;
+    int ref_field_pic_polarity = 0;
+    int scale_factor = 0;
+    int trans_ac_y = 0;
+    int dmv_surface_valid = 0;
+    int brfd = 0;
+    int fcm = 0;
+    int picture_type;
+    int profile;
+    int overlap;
+    int interpolation_mode = 0;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+    profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
+    dquant = pic_param->pic_quantizer_fields.bits.dquant;
+    dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
+    dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
+    dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
+    dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
+    dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
+    alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
+
+    if (dquant == 0) {
+        alt_pquant_config = 0;
+        alt_pquant_edge_mask = 0;
+    } else if (dquant == 2) {
+        alt_pquant_config = 1;
+        alt_pquant_edge_mask = 0xf;
+    } else {
+        assert(dquant == 1);
+        if (dquantfrm == 0) {
+            alt_pquant_config = 0;
+            alt_pquant_edge_mask = 0;
+            alt_pq = 0;
+        } else {
+            assert(dquantfrm == 1);
+            alt_pquant_config = 1;
+
+            switch (dqprofile) {
+            case 3:
+                if (dqbilevel == 0) {
+                    alt_pquant_config = 2;
+                    alt_pquant_edge_mask = 0;
+                } else {
+                    assert(dqbilevel == 1);
+                    alt_pquant_config = 3;
+                    alt_pquant_edge_mask = 0;
+                }
+                break;
+                
+            case 0:
+                alt_pquant_edge_mask = 0xf;
+                break;
+
+            case 1:
+                if (dqdbedge == 3)
+                    alt_pquant_edge_mask = 0x9;
+                else
+                    alt_pquant_edge_mask = (0x3 << dqdbedge);
+
+                break;
+
+            case 2:
+                alt_pquant_edge_mask = (0x1 << dqsbedge);
+                break;
+
+            default:
+                assert(0);
+            }
+        }
+    }
+
+    if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
+        assert(pic_param->mv_fields.bits.mv_mode2 < 4);
+        unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
+    } else {
+        assert(pic_param->mv_fields.bits.mv_mode < 4);
+        unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
+    }
+
+    if (pic_param->sequence_fields.bits.interlace == 1 &&
+        pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
+        /* FIXME: calculate reference field picture polarity */
+        assert(0);
+        ref_field_pic_polarity = 0;
+    }
+
+    if (pic_param->b_picture_fraction < 21)
+        scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
+
+    picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
+    
+    if (profile == GEN7_VC1_ADVANCED_PROFILE && 
+        picture_type == GEN7_VC1_I_PICTURE)
+        picture_type = GEN7_VC1_BI_PICTURE;
+
+    if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
+        trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
+    else
+        trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
+
+
+    if (picture_type == GEN7_VC1_B_PICTURE) {
+        struct gen7_vc1_surface *gen7_vc1_surface = NULL;
+
+        obj_surface = SURFACE(pic_param->backward_reference_picture);
+        assert(obj_surface);
+        gen7_vc1_surface = obj_surface->private_data;
+
+        if (!gen7_vc1_surface || 
+            (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
+             va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
+            dmv_surface_valid = 0;
+        else
+            dmv_surface_valid = 1;
+    }
+
+    assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
+
+    if (pic_param->picture_fields.bits.frame_coding_mode < 2)
+        fcm = pic_param->picture_fields.bits.frame_coding_mode;
+    else {
+        if (pic_param->picture_fields.bits.top_field_first)
+            fcm = 2;
+        else
+            fcm = 3;
+    }
+
+    if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
+        brfd = pic_param->reference_fields.bits.reference_distance;
+        brfd = (scale_factor * brfd) >> 8;
+        brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
+
+        if (brfd < 0)
+            brfd = 0;
+    }
+
+    overlap = pic_param->sequence_fields.bits.overlap;
+    if (profile != GEN7_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
+        overlap = 0;
+
+    assert(pic_param->conditional_overlap_flag < 3);
+    assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
+
+    if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
+        (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
+         pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
+        interpolation_mode = 9; /* Half-pel bilinear */
+    else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
+             (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
+              pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
+        interpolation_mode = 1; /* Half-pel bicubic */
+    else
+        interpolation_mode = 0; /* Quarter-pel bicubic */
+
+    BEGIN_BCS_BATCH(batch, 6);
+    OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
+    OUT_BCS_BATCH(batch,
+                  (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
+                  ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
+    OUT_BCS_BATCH(batch,
+                  ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
+                  dmv_surface_valid << 15 |
+                  (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
+                  pic_param->rounding_control << 13 |
+                  pic_param->sequence_fields.bits.syncmarker << 12 |
+                  interpolation_mode << 8 |
+                  0 << 7 | /* FIXME: scale up or down ??? */
+                  pic_param->range_reduction_frame << 6 |
+                  pic_param->entrypoint_fields.bits.loopfilter << 5 |
+                  overlap << 4 |
+                  !pic_param->picture_fields.bits.is_first_field << 3 |
+                  (pic_param->sequence_fields.bits.profile == 3) << 0);
+    OUT_BCS_BATCH(batch,
+                  va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
+                  picture_type << 26 |
+                  fcm << 24 |
+                  alt_pq << 16 |
+                  pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
+                  scale_factor << 0);
+    OUT_BCS_BATCH(batch,
+                  unified_mv_mode << 28 |
+                  pic_param->mv_fields.bits.four_mv_switch << 27 |
+                  pic_param->fast_uvmc_flag << 26 |
+                  ref_field_pic_polarity << 25 |
+                  pic_param->reference_fields.bits.num_reference_pictures << 24 |
+                  pic_param->reference_fields.bits.reference_distance << 20 |
+                  pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
+                  pic_param->mv_fields.bits.extended_dmv_range << 10 |
+                  pic_param->mv_fields.bits.extended_mv_range << 8 |
+                  alt_pquant_edge_mask << 4 |
+                  alt_pquant_config << 2 |
+                  pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
+                  pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
+    OUT_BCS_BATCH(batch,
+                  !!pic_param->bitplane_present.value << 31 |
+                  !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
+                  !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
+                  !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
+                  !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
+                  !pic_param->bitplane_present.flags.bp_overflags << 26 |
+                  !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
+                  !pic_param->bitplane_present.flags.bp_field_tx << 24 |
+                  pic_param->mv_fields.bits.mv_table << 20 |
+                  pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
+                  pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
+                  pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
+                  pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
+                  pic_param->mb_mode_table << 8 |
+                  trans_ac_y << 6 |
+                  pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
+                  pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
+                  pic_param->cbp_table << 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
+                             struct decode_state *decode_state,
+                             struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    VAPictureParameterBufferVC1 *pic_param;
+    int intensitycomp_single;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+    intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
+
+    BEGIN_BCS_BATCH(batch, 6);
+    OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
+    OUT_BCS_BATCH(batch,
+                  0 << 14 | /* FIXME: double ??? */
+                  0 << 12 |
+                  intensitycomp_single << 10 |
+                  intensitycomp_single << 8 |
+                  0 << 4 | /* FIXME: interlace mode */
+                  0);
+    OUT_BCS_BATCH(batch,
+                  pic_param->luma_shift << 16 |
+                  pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
+                              struct decode_state *decode_state,
+                              struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    VAPictureParameterBufferVC1 *pic_param;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+    obj_surface = SURFACE(decode_state->current_render_target);
+
+    if (obj_surface && obj_surface->private_data) {
+        dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+    }
+
+    obj_surface = SURFACE(pic_param->backward_reference_picture);
+
+    if (obj_surface && obj_surface->private_data) {
+        dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+    }
+
+    BEGIN_BCS_BATCH(batch, 7);
+    OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
+
+    if (dmv_write_buffer)
+        OUT_BCS_RELOC(batch, dmv_write_buffer,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+
+    if (dmv_read_buffer)
+        OUT_BCS_RELOC(batch, dmv_read_buffer,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+                  
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
+                              struct decode_state *decode_state,
+                              struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    VAPictureParameterBufferVC1 *pic_param;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
+	return;
+    }
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+    obj_surface = SURFACE(decode_state->current_render_target);
+
+    if (obj_surface && obj_surface->private_data) {
+        dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+    }
+
+    obj_surface = SURFACE(pic_param->backward_reference_picture);
+
+    if (obj_surface && obj_surface->private_data) {
+        dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+    }
+
+    BEGIN_BCS_BATCH(batch, 3);
+    OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
+
+    if (dmv_write_buffer)
+        OUT_BCS_RELOC(batch, dmv_write_buffer,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+
+    if (dmv_read_buffer)
+        OUT_BCS_RELOC(batch, dmv_read_buffer,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      0);
+    else
+        OUT_BCS_BATCH(batch, 0);
+                  
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static int
+gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
+{
+    int out_slice_data_bit_offset;
+    int slice_header_size = in_slice_data_bit_offset / 8;
+    int i, j;
+
+    if (profile != 3)
+        out_slice_data_bit_offset = in_slice_data_bit_offset;
+    else {
+        for (i = 0, j = 0; i < slice_header_size; i++, j++) {
+            if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
+                i++, j += 2;
+            }
+        }
+
+        out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
+    }
+
+    return out_slice_data_bit_offset;
+}
+
+static void
+gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
+                        VAPictureParameterBufferVC1 *pic_param,
+                        VASliceParameterBufferVC1 *slice_param,
+                        VASliceParameterBufferVC1 *next_slice_param,
+                        dri_bo *slice_data_bo,
+                        struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    int next_slice_start_vert_pos;
+    int macroblock_offset;
+    uint8_t *slice_data = NULL;
+
+    dri_bo_map(slice_data_bo, 0);
+    slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
+    macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data, 
+                                                               slice_param->macroblock_offset,
+                                                               pic_param->sequence_fields.bits.profile);
+    dri_bo_unmap(slice_data_bo);
+
+    if (next_slice_param)
+        next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
+    else
+        next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
+
+    BEGIN_BCS_BATCH(batch, 5);
+    OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
+    OUT_BCS_BATCH(batch, 
+                  slice_param->slice_data_size - (macroblock_offset >> 3));
+    OUT_BCS_BATCH(batch, 
+                  slice_param->slice_data_offset + (macroblock_offset >> 3));
+    OUT_BCS_BATCH(batch,
+                  slice_param->slice_vertical_position << 16 |
+                  next_slice_start_vert_pos << 0);
+    OUT_BCS_BATCH(batch,
+                  (macroblock_offset & 0x7));
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
+                            struct decode_state *decode_state,
+                            struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    VAPictureParameterBufferVC1 *pic_param;
+    VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
+    dri_bo *slice_data_bo;
+    int i, j;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+    gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
+    intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+    intel_batchbuffer_emit_mi_flush(batch);
+    gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+    gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+    gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+    gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+    gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
+    gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
+    gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
+
+    for (j = 0; j < decode_state->num_slice_params; j++) {
+        assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+        slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
+        slice_data_bo = decode_state->slice_datas[j]->bo;
+        gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
+
+        if (j == decode_state->num_slice_params - 1)
+            next_slice_group_param = NULL;
+        else
+            next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
+
+        for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+            assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+            if (i < decode_state->slice_params[j]->num_elements - 1)
+                next_slice_param = slice_param + 1;
+            else
+                next_slice_param = next_slice_group_param;
+
+            gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
+            slice_param++;
+        }
+    }
+
+    intel_batchbuffer_end_atomic(batch);
+    intel_batchbuffer_flush(batch);
+}
+
+static void
+gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
+                          struct decode_state *decode_state,
+                          struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    VAPictureParameterBufferJPEGBaseline *pic_param;
+    int subsampling = SUBSAMPLE_YUV420;
+
+    pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+    if (pic_param->num_components == 1)
+        subsampling = SUBSAMPLE_YUV400;
+    else if (pic_param->num_components == 3) {
+        int h1 = pic_param->components[0].h_sampling_factor;
+        int h2 = pic_param->components[1].h_sampling_factor;
+        int h3 = pic_param->components[2].h_sampling_factor;
+        int v1 = pic_param->components[0].v_sampling_factor;
+        int v2 = pic_param->components[1].v_sampling_factor;
+        int v3 = pic_param->components[2].v_sampling_factor;
+
+        if (h1 == 2 && h2 == 1 && h3 == 1 &&
+            v1 == 2 && v2 == 1 && v3 == 1)
+            subsampling = SUBSAMPLE_YUV420;
+        else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+                 v1 == 1 && v2 == 1 && v3 == 1)
+            subsampling = SUBSAMPLE_YUV422H;
+        else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+                 v1 == 1 && v2 == 1 && v3 == 1)
+            subsampling = SUBSAMPLE_YUV444;
+        else if (h1 == 4 && h2 == 1 && h3 == 1 &&
+                 v1 == 1 && v2 == 1 && v3 == 1)
+            subsampling = SUBSAMPLE_YUV411;
+        else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+                 v1 == 2 && v2 == 1 && v3 == 1)
+            subsampling = SUBSAMPLE_YUV422V;
+        else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+                 v1 == 2 && v2 == 2 && v3 == 2)
+            subsampling = SUBSAMPLE_YUV422H;
+        else if (h2 == 2 && h2 == 2 && h3 == 2 &&
+                 v1 == 2 && v2 == 1 && v3 == 1)
+            subsampling = SUBSAMPLE_YUV422V;
+        else
+            assert(0);
+    } else {
+        assert(0);
+    }
+
+    /* Current decoded picture */
+    obj_surface = SURFACE(decode_state->current_render_target);
+    assert(obj_surface);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
+
+    dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+    gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+    dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+    gen7_mfd_context->pre_deblocking_output.valid = 1;
+
+    gen7_mfd_context->post_deblocking_output.bo = NULL;
+    gen7_mfd_context->post_deblocking_output.valid = 0;
+
+    gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
+    gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
+
+    gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+    gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
+
+    gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+    gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
+
+    gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
+    gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
+
+    gen7_mfd_context->bitplane_read_buffer.bo = NULL;
+    gen7_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static const int va_to_gen7_jpeg_rotation[4] = {
+    GEN7_JPEG_ROTATION_0,
+    GEN7_JPEG_ROTATION_90,
+    GEN7_JPEG_ROTATION_180,
+    GEN7_JPEG_ROTATION_270
+};
+
+static void
+gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
+                        struct decode_state *decode_state,
+                        struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    VAPictureParameterBufferJPEGBaseline *pic_param;
+    int chroma_type = GEN7_YUV420;
+    int frame_width_in_blks;
+    int frame_height_in_blks;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+    if (pic_param->num_components == 1)
+        chroma_type = GEN7_YUV400;
+    else if (pic_param->num_components == 3) {
+        int h1 = pic_param->components[0].h_sampling_factor;
+        int h2 = pic_param->components[1].h_sampling_factor;
+        int h3 = pic_param->components[2].h_sampling_factor;
+        int v1 = pic_param->components[0].v_sampling_factor;
+        int v2 = pic_param->components[1].v_sampling_factor;
+        int v3 = pic_param->components[2].v_sampling_factor;
+
+        if (h1 == 2 && h2 == 1 && h3 == 1 &&
+            v1 == 2 && v2 == 1 && v3 == 1)
+            chroma_type = GEN7_YUV420;
+        else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+                 v1 == 1 && v2 == 1 && v3 == 1)
+            chroma_type = GEN7_YUV422H_2Y;
+        else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+                 v1 == 1 && v2 == 1 && v3 == 1)
+            chroma_type = GEN7_YUV444;
+        else if (h1 == 4 && h2 == 1 && h3 == 1 &&
+                 v1 == 1 && v2 == 1 && v3 == 1)
+            chroma_type = GEN7_YUV411;
+        else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+                 v1 == 2 && v2 == 1 && v3 == 1)
+            chroma_type = GEN7_YUV422V_2Y;
+        else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+                 v1 == 2 && v2 == 2 && v3 == 2)
+            chroma_type = GEN7_YUV422H_4Y;
+        else if (h2 == 2 && h2 == 2 && h3 == 2 &&
+                 v1 == 2 && v2 == 1 && v3 == 1)
+            chroma_type = GEN7_YUV422V_4Y;
+        else
+            assert(0);
+    }
+
+    if (chroma_type == GEN7_YUV400 ||
+        chroma_type == GEN7_YUV444 ||
+        chroma_type == GEN7_YUV422V_2Y) {
+        frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
+        frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
+    } else if (chroma_type == GEN7_YUV411) {
+        frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
+        frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
+    } else {
+        frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
+        frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
+    }
+
+    BEGIN_BCS_BATCH(batch, 3);
+    OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
+    OUT_BCS_BATCH(batch,
+                  (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
+                  (chroma_type << 0));
+    OUT_BCS_BATCH(batch,
+                  ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
+                  ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static const int va_to_gen7_jpeg_hufftable[2] = {
+    MFX_HUFFTABLE_ID_Y,
+    MFX_HUFFTABLE_ID_UV
+};
+
+static void
+gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
+                               struct decode_state *decode_state,
+                               struct gen7_mfd_context *gen7_mfd_context,
+                               int num_tables)
+{
+    VAHuffmanTableBufferJPEGBaseline *huffman_table;
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    int index;
+
+    if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
+        return;
+
+    huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
+
+    for (index = 0; index < num_tables; index++) {
+        int id = va_to_gen7_jpeg_hufftable[index];
+        BEGIN_BCS_BATCH(batch, 53);
+        OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
+        OUT_BCS_BATCH(batch, id);
+        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
+        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
+        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
+        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
+        ADVANCE_BCS_BATCH(batch);
+    }
+}
+
+static const int va_to_gen7_jpeg_qm[5] = {
+    -1,
+    MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
+    MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
+    MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
+    MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
+};
+
+static void
+gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
+                       struct decode_state *decode_state,
+                       struct gen7_mfd_context *gen7_mfd_context)
+{
+    VAPictureParameterBufferJPEGBaseline *pic_param;
+    VAIQMatrixBufferJPEGBaseline *iq_matrix;
+    int index;
+
+    if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
+        return;
+
+    iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
+    pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+    assert(pic_param->num_components <= 3);
+
+    for (index = 0; index < pic_param->num_components; index++) {
+        int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
+        unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
+        unsigned char raster_qm[64];
+        int j;
+
+        if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
+            continue;
+
+        for (j = 0; j < 64; j++)
+            raster_qm[zigzag_direct[j]] = qm[j];
+
+        gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
+    }
+}
+
+static void
+gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
+                         VAPictureParameterBufferJPEGBaseline *pic_param,
+                         VASliceParameterBufferJPEGBaseline *slice_param,
+                         VASliceParameterBufferJPEGBaseline *next_slice_param,
+                         dri_bo *slice_data_bo,
+                         struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    int scan_component_mask = 0;
+    int i;
+
+    assert(slice_param->num_components > 0);
+    assert(slice_param->num_components < 4);
+    assert(slice_param->num_components <= pic_param->num_components);
+
+    for (i = 0; i < slice_param->num_components; i++) {
+        switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
+        case 1:
+            scan_component_mask |= (1 << 0);
+            break;
+        case 2:
+            scan_component_mask |= (1 << 1);
+            break;
+        case 3:
+            scan_component_mask |= (1 << 2);
+            break;
+        default:
+            assert(0);
+            break;
+        }
+    }
+
+    BEGIN_BCS_BATCH(batch, 6);
+    OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
+    OUT_BCS_BATCH(batch, 
+                  slice_param->slice_data_size);
+    OUT_BCS_BATCH(batch, 
+                  slice_param->slice_data_offset);
+    OUT_BCS_BATCH(batch,
+                  slice_param->slice_horizontal_position << 16 |
+                  slice_param->slice_vertical_position << 0);
+    OUT_BCS_BATCH(batch,
+                  ((slice_param->num_components != 1) << 30) |  /* interleaved */
+                  (scan_component_mask << 27) |                 /* scan components */
+                  (0 << 26) |   /* disable interrupt allowed */
+                  (slice_param->num_mcus << 0));                /* MCU count */
+    OUT_BCS_BATCH(batch,
+                  (slice_param->restart_interval << 0));    /* RestartInterval */
+    ADVANCE_BCS_BATCH(batch);
+}
+
+/* Workaround for JPEG decoding on Ivybridge */
+
+VAStatus 
+i965_DestroySurfaces(VADriverContextP ctx,
+                     VASurfaceID *surface_list,
+                     int num_surfaces);
+VAStatus 
+i965_CreateSurfaces(VADriverContextP ctx,
+                    int width,
+                    int height,
+                    int format,
+                    int num_surfaces,
+                    VASurfaceID *surfaces);
+
+static struct {
+    int width;
+    int height;
+    unsigned char data[32];
+    int data_size;
+    int data_bit_offset;
+    int qp;
+} gen7_jpeg_wa_clip = {
+    16,
+    16,
+    {
+        0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
+        0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
+    },
+    14,
+    40,
+    28,
+};
+
+static void
+gen75_jpeg_wa_init(VADriverContextP ctx,
+                  struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    VAStatus status;
+    struct object_surface *obj_surface;
+
+    if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
+        i965_DestroySurfaces(ctx,
+                             &gen7_mfd_context->jpeg_wa_surface_id,
+                             1);
+
+    status = i965_CreateSurfaces(ctx,
+                                 gen7_jpeg_wa_clip.width,
+                                 gen7_jpeg_wa_clip.height,
+                                 VA_RT_FORMAT_YUV420,
+                                 1,
+                                 &gen7_mfd_context->jpeg_wa_surface_id);
+    assert(status == VA_STATUS_SUCCESS);
+
+    obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
+    assert(obj_surface);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+
+    if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
+        gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
+                                                               "JPEG WA data",
+                                                               0x1000,
+                                                               0x1000);
+        dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
+                       0,
+                       gen7_jpeg_wa_clip.data_size,
+                       gen7_jpeg_wa_clip.data);
+    }
+}
+
+static void
+gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
+                              struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 5);
+    OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
+    OUT_BCS_BATCH(batch,
+                  (MFX_LONG_MODE << 17) | /* Currently only support long format */
+                  (MFD_MODE_VLD << 15) | /* VLD mode */
+                  (0 << 10) | /* disable Stream-Out */
+                  (0 << 9)  | /* Post Deblocking Output */
+                  (1 << 8)  | /* Pre Deblocking Output */
+                  (0 << 5)  | /* not in stitch mode */
+                  (MFX_CODEC_DECODE << 4)  | /* decoding mode */
+                  (MFX_FORMAT_AVC << 0));
+    OUT_BCS_BATCH(batch,
+                  (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
+                  (0 << 3)  | /* terminate if AVC mbdata error occurs */
+                  (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
+                  (0 << 1)  |
+                  (0 << 0));
+    OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
+    OUT_BCS_BATCH(batch, 0); /* reserved */
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_surface_state(VADriverContextP ctx,
+                           struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 6);
+    OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch,
+                  ((obj_surface->orig_width - 1) << 18) |
+                  ((obj_surface->orig_height - 1) << 4));
+    OUT_BCS_BATCH(batch,
+                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+                  (1 << 27) | /* interleave chroma, set to 0 for JPEG */
+                  (0 << 22) | /* surface object control state, ignored */
+                  ((obj_surface->width - 1) << 3) | /* pitch */
+                  (0 << 2)  | /* must be 0 */
+                  (1 << 1)  | /* must be tiled */
+                  (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
+    OUT_BCS_BATCH(batch,
+                  (0 << 16) | /* X offset for U(Cb), must be 0 */
+                  (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
+    OUT_BCS_BATCH(batch,
+                  (0 << 16) | /* X offset for V(Cr), must be 0 */
+                  (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
+    ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
+                                 struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    dri_bo *intra_bo;
+    int i;
+
+    intra_bo = dri_bo_alloc(i965->intel.bufmgr,
+                            "intra row store",
+                            128 * 64,
+                            0x1000);
+
+    BEGIN_BCS_BATCH(batch, 61);
+    OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
+    OUT_BCS_RELOC(batch,
+                  obj_surface->bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+    
+
+    OUT_BCS_BATCH(batch, 0); /* post deblocking */
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+
+	/* uncompressed-video & stream out 7-12 */
+    OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+    OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+
+	/* the DW 13-15 is for intra row store scratch */
+    OUT_BCS_RELOC(batch,
+                  intra_bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+
+	/* the DW 16-18 is for deblocking filter */ 
+    OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+
+    /* DW 19..50 */
+    for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+    }
+    OUT_BCS_BATCH(batch, 0);
+
+	/* the DW52-54 is for mb status address */
+    OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	/* the DW56-60 is for ILDB & second ILDB address */
+    OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+	OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+
+    dri_bo_unreference(intra_bo);
+}
+
+static void
+gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
+                                 struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    dri_bo *intra_bo;
+    int i;
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
+	return;
+    }
+    intra_bo = dri_bo_alloc(i965->intel.bufmgr,
+                            "intra row store",
+                            128 * 64,
+                            0x1000);
+
+    BEGIN_BCS_BATCH(batch, 25);
+    OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
+    OUT_BCS_RELOC(batch,
+                  obj_surface->bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+    
+    OUT_BCS_BATCH(batch, 0); /* post deblocking */
+
+    OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+    OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+
+    OUT_BCS_RELOC(batch,
+                  intra_bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+
+    OUT_BCS_BATCH(batch, 0);
+
+    /* DW 7..22 */
+    for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+        OUT_BCS_BATCH(batch, 0);
+    }
+
+    OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
+    OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
+    ADVANCE_BCS_BATCH(batch);
+
+    dri_bo_unreference(intra_bo);
+}
+
+static void
+gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
+                                     struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    dri_bo *bsd_mpc_bo, *mpr_bo;
+
+    bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
+                              "bsd mpc row store",
+                              11520, /* 1.5 * 120 * 64 */
+                              0x1000);
+
+    mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
+                          "mpr row store",
+                          7680, /* 1. 0 * 120 * 64 */
+                          0x1000);
+
+    BEGIN_BCS_BATCH(batch, 10);
+    OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
+
+    OUT_BCS_RELOC(batch,
+                  bsd_mpc_bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    OUT_BCS_RELOC(batch,
+                  mpr_bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+
+    dri_bo_unreference(bsd_mpc_bo);
+    dri_bo_unreference(mpr_bo);
+}
+
+static void
+gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
+                                     struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    dri_bo *bsd_mpc_bo, *mpr_bo;
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
+	return;
+    }
+	
+    bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
+                              "bsd mpc row store",
+                              11520, /* 1.5 * 120 * 64 */
+                              0x1000);
+
+    mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
+                          "mpr row store",
+                          7680, /* 1. 0 * 120 * 64 */
+                          0x1000);
+
+    BEGIN_BCS_BATCH(batch, 4);
+    OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
+
+    OUT_BCS_RELOC(batch,
+                  bsd_mpc_bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+
+    OUT_BCS_RELOC(batch,
+                  mpr_bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+
+    dri_bo_unreference(bsd_mpc_bo);
+    dri_bo_unreference(mpr_bo);
+}
+
+static void
+gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
+                          struct gen7_mfd_context *gen7_mfd_context)
+{
+
+}
+
+static void
+gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
+                           struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    int img_struct = 0;
+    int mbaff_frame_flag = 0;
+    unsigned int width_in_mbs = 1, height_in_mbs = 1;
+
+    BEGIN_BCS_BATCH(batch, 16);
+    OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
+    OUT_BCS_BATCH(batch, 
+                  width_in_mbs * height_in_mbs);
+    OUT_BCS_BATCH(batch, 
+                  ((height_in_mbs - 1) << 16) | 
+                  ((width_in_mbs - 1) << 0));
+    OUT_BCS_BATCH(batch, 
+                  (0 << 24) |
+                  (0 << 16) |
+                  (0 << 14) |
+                  (0 << 13) |
+                  (0 << 12) | /* differ from GEN6 */
+                  (0 << 10) |
+                  (img_struct << 8));
+    OUT_BCS_BATCH(batch,
+                  (1 << 10) | /* 4:2:0 */
+                  (1 << 7) |  /* CABAC */
+                  (0 << 6) |
+                  (0 << 5) |
+                  (0 << 4) |
+                  (0 << 3) |
+                  (1 << 2) |
+                  (mbaff_frame_flag << 1) |
+                  (0 << 0));
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
+                                  struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    int i;
+
+    BEGIN_BCS_BATCH(batch, 71);
+    OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
+
+    /* reference surfaces 0..15 */
+    for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+        OUT_BCS_BATCH(batch, 0); /* top */
+        OUT_BCS_BATCH(batch, 0); /* bottom */
+    }
+	
+        OUT_BCS_BATCH(batch, 0);
+
+    /* the current decoding frame/field */
+    OUT_BCS_BATCH(batch, 0); /* top */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    /* POC List */
+    for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+    }
+
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
+                                  struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    int i;
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
+	return;
+    }	
+
+    BEGIN_BCS_BATCH(batch, 69);
+    OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
+
+    /* reference surfaces 0..15 */
+    for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+        OUT_BCS_BATCH(batch, 0); /* top */
+        OUT_BCS_BATCH(batch, 0); /* bottom */
+    }
+
+    /* the current decoding frame/field */
+    OUT_BCS_BATCH(batch, 0); /* top */
+    OUT_BCS_BATCH(batch, 0); /* bottom */
+
+    /* POC List */
+    for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+        OUT_BCS_BATCH(batch, 0);
+        OUT_BCS_BATCH(batch, 0);
+    }
+
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void 
+gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
+                                     struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+    BEGIN_BCS_BATCH(batch, 26);
+    OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
+    OUT_BCS_RELOC(batch,
+                  gen7_mfd_context->jpeg_wa_slice_data_bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, 0,
+                  0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+	
+    OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+    OUT_BCS_BATCH(batch, 0);
+
+	/* MFX indirect MV 6-10 */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+	/* MFX IT_COFF 11-15 */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+	/* MFX IT_DBLK 16-20 */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+
+	/* MFX PAK_BSE object for encoder 21-25 */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
+                                     struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+    if (IS_STEPPING_BPLUS(i965)) {
+	gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
+	return;
+    }	
+
+    BEGIN_BCS_BATCH(batch, 11);
+    OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
+    OUT_BCS_RELOC(batch,
+                  gen7_mfd_context->jpeg_wa_slice_data_bo,
+                  I915_GEM_DOMAIN_INSTRUCTION, 0,
+                  0);
+    OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
+                            struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+    /* the input bitsteam format on GEN7 differs from GEN6 */
+    BEGIN_BCS_BATCH(batch, 6);
+    OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
+    OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch,
+                  (0 << 31) |
+                  (0 << 14) |
+                  (0 << 12) |
+                  (0 << 10) |
+                  (0 << 8));
+    OUT_BCS_BATCH(batch,
+                  ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
+                  (0 << 5)  |
+                  (0 << 4)  |
+                  (1 << 3) | /* LastSlice Flag */
+                  (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
+                             struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
+    int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
+    int first_mb_in_slice = 0;
+    int slice_type = SLICE_TYPE_I;
+
+    BEGIN_BCS_BATCH(batch, 11);
+    OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
+    OUT_BCS_BATCH(batch, slice_type);
+    OUT_BCS_BATCH(batch, 
+                  (num_ref_idx_l1 << 24) |
+                  (num_ref_idx_l0 << 16) |
+                  (0 << 8) |
+                  (0 << 0));
+    OUT_BCS_BATCH(batch, 
+                  (0 << 29) |
+                  (1 << 27) |   /* disable Deblocking */
+                  (0 << 24) |
+                  (gen7_jpeg_wa_clip.qp << 16) |
+                  (0 << 8) |
+                  (0 << 0));
+    OUT_BCS_BATCH(batch, 
+                  (slice_ver_pos << 24) |
+                  (slice_hor_pos << 16) | 
+                  (first_mb_in_slice << 0));
+    OUT_BCS_BATCH(batch,
+                  (next_slice_ver_pos << 16) |
+                  (next_slice_hor_pos << 0));
+    OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    OUT_BCS_BATCH(batch, 0);
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_jpeg_wa(VADriverContextP ctx,
+                 struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    gen75_jpeg_wa_init(ctx, gen7_mfd_context);
+    intel_batchbuffer_emit_mi_flush(batch);
+    gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
+    gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
+    gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
+    gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
+    gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
+    gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
+    gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
+
+    gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
+    gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
+    gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
+}
+
+void
+gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
+                             struct decode_state *decode_state,
+                             struct gen7_mfd_context *gen7_mfd_context)
+{
+    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+    VAPictureParameterBufferJPEGBaseline *pic_param;
+    VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
+    dri_bo *slice_data_bo;
+    int i, j, max_selector = 0;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+    /* Currently only support Baseline DCT */
+    gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
+    intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+    gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
+    intel_batchbuffer_emit_mi_flush(batch);
+    gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
+    gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
+    gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
+    gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
+    gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
+
+    for (j = 0; j < decode_state->num_slice_params; j++) {
+        assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+        slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
+        slice_data_bo = decode_state->slice_datas[j]->bo;
+        gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
+
+        if (j == decode_state->num_slice_params - 1)
+            next_slice_group_param = NULL;
+        else
+            next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
+
+        for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+            int component;
+
+            assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+            if (i < decode_state->slice_params[j]->num_elements - 1)
+                next_slice_param = slice_param + 1;
+            else
+                next_slice_param = next_slice_group_param;
+
+            for (component = 0; component < slice_param->num_components; component++) {
+                if (max_selector < slice_param->components[component].dc_table_selector)
+                    max_selector = slice_param->components[component].dc_table_selector;
+
+                if (max_selector < slice_param->components[component].ac_table_selector)
+                    max_selector = slice_param->components[component].ac_table_selector;
+            }
+
+            slice_param++;
+        }
+    }
+
+    assert(max_selector < 2);
+    gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
+
+    for (j = 0; j < decode_state->num_slice_params; j++) {
+        assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+        slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
+        slice_data_bo = decode_state->slice_datas[j]->bo;
+        gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
+
+        if (j == decode_state->num_slice_params - 1)
+            next_slice_group_param = NULL;
+        else
+            next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
+
+        for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+            assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+            if (i < decode_state->slice_params[j]->num_elements - 1)
+                next_slice_param = slice_param + 1;
+            else
+                next_slice_param = next_slice_group_param;
+
+            gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
+            slice_param++;
+        }
+    }
+
+    intel_batchbuffer_end_atomic(batch);
+    intel_batchbuffer_flush(batch);
+}
+
+static void 
+gen75_mfd_decode_picture(VADriverContextP ctx, 
+                        VAProfile profile, 
+                        union codec_state *codec_state,
+                        struct hw_context *hw_context)
+
+{
+    struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
+    struct decode_state *decode_state = &codec_state->decode;
+
+    assert(gen7_mfd_context);
+
+    gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
+
+    switch (profile) {
+    case VAProfileMPEG2Simple:
+    case VAProfileMPEG2Main:
+        gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
+        break;
+        
+    case VAProfileH264Baseline:
+    case VAProfileH264Main:
+    case VAProfileH264High:
+        gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
+        break;
+
+    case VAProfileVC1Simple:
+    case VAProfileVC1Main:
+    case VAProfileVC1Advanced:
+        gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
+        break;
+
+    case VAProfileJPEGBaseline:
+        gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
+        break;
+
+    default:
+        assert(0);
+        break;
+    }
+}
+
+static void
+gen75_mfd_context_destroy(void *hw_context)
+{
+    struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
+
+    dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
+    gen7_mfd_context->post_deblocking_output.bo = NULL;
+
+    dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+    gen7_mfd_context->pre_deblocking_output.bo = NULL;
+
+    dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
+    gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
+
+    dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+    gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+
+    dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+    gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+
+    dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
+    gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
+
+    dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
+    gen7_mfd_context->bitplane_read_buffer.bo = NULL;
+
+    dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
+
+    intel_batchbuffer_free(gen7_mfd_context->base.batch);
+    free(gen7_mfd_context);
+}
+
+static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
+                                    struct gen7_mfd_context *gen7_mfd_context)
+{
+    gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
+    gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
+    gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
+    gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
+}
+
+struct hw_context *
+gen75_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+    struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
+    int i;
+
+    gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
+    gen7_mfd_context->base.run = gen75_mfd_decode_picture;
+    gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
+
+    for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+        gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
+        gen7_mfd_context->reference_surface[i].frame_store_id = -1;
+    }
+
+    gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
+
+    switch (profile) {
+    case VAProfileMPEG2Simple:
+    case VAProfileMPEG2Main:
+        gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
+        break;
+
+    case VAProfileH264Baseline:
+    case VAProfileH264Main:
+    case VAProfileH264High:
+        gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
+        break;
+    default:
+        break;
+    }
+    return (struct hw_context *)gen7_mfd_context;
+}
diff --git a/src/gen75_vme.c b/src/gen75_vme.c
new file mode 100644
index 0000000..4c39a23
--- /dev/null
+++ b/src/gen75_vme.c
@@ -0,0 +1,811 @@
+/*
+ * Copyright © 2010-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Zhao Yakui <yakui.zhao at intel.com>
+ *    Xiang HaiHao <haihao.xiang at intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "gen6_vme.h"
+#include "i965_encoder.h"
+
+#define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+
+#define SURFACE_STATE_PADDED_SIZE_0_GEN6        ALIGN(sizeof(struct i965_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_GEN6        ALIGN(sizeof(struct i965_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN6          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+
+#define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
+#define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6)
+
+#define VME_INTRA_SHADER	0	
+#define VME_INTER_SHADER	1
+
+#define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
+#define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
+#define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
+
+#define VME_MSG_LENGTH		32
+  
+static const uint32_t gen75_vme_intra_frame[][4] = {
+#include "shaders/vme/intra_frame_haswell.g75b"
+};
+
+static const uint32_t gen75_vme_inter_frame[][4] = {
+#include "shaders/vme/inter_frame_haswell.g75b"
+};
+
+static struct i965_kernel gen75_vme_kernels[] = {
+    {
+        "VME Intra Frame",
+        VME_INTRA_SHADER,										/*index*/
+        gen75_vme_intra_frame, 			
+        sizeof(gen75_vme_intra_frame),		
+        NULL
+    },
+    {
+        "VME inter Frame",
+        VME_INTER_SHADER,
+        gen75_vme_inter_frame,
+        sizeof(gen75_vme_inter_frame),
+        NULL
+    }
+};
+
+/*
+ * Surface state for IvyBridge
+ */
+static
+void gen75_vme_set_common_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss0.tiled_surface = 0;
+        ss->ss0.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss0.tiled_surface = 1;
+        ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss0.tiled_surface = 1;
+        ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+static void
+gen75_vme_set_source_surface_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss2.tiled_surface = 0;
+        ss->ss2.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+
+/* only used for VME source surface state */
+static void gen75_vme_source_surface_state(VADriverContextP ctx,
+                                          int index,
+                                          struct object_surface *obj_surface,
+                                          struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    struct gen7_surface_state2 *ss;
+    dri_bo *bo;
+    int w, h, w_pitch, h_pitch;
+    unsigned int tiling, swizzle;
+
+    assert(obj_surface->bo);
+
+    w = obj_surface->orig_width;
+    h = obj_surface->orig_height;
+    w_pitch = obj_surface->width;
+    h_pitch = obj_surface->height;
+
+    bo = vme_context->surface_state_binding_table.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+
+    ss = (struct gen7_surface_state2 *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
+    memset(ss, 0, sizeof(*ss));
+
+    ss->ss0.surface_base_address = obj_surface->bo->offset;
+
+    ss->ss1.cbcr_pixel_offset_v_direction = 2;
+    ss->ss1.width = w - 1;
+    ss->ss1.height = h - 1;
+
+    ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
+    ss->ss2.interleave_chroma = 1;
+    ss->ss2.pitch = w_pitch - 1;
+    ss->ss2.half_pitch_for_chroma = 0;
+
+    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+    gen75_vme_set_source_surface_tiling(ss, tiling);
+
+    /* UV offset for interleave mode */
+    ss->ss3.x_offset_for_cb = 0;
+    ss->ss3.y_offset_for_cb = h_pitch;
+
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, 0,
+                      0,
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
+                      obj_surface->bo);
+
+    ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+    dri_bo_unmap(bo);
+}
+
+static void
+gen75_vme_media_source_surface_state(VADriverContextP ctx,
+                                    int index,
+                                    struct object_surface *obj_surface,
+                                    struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    struct gen7_surface_state *ss;
+    dri_bo *bo;
+    int w, h, w_pitch;
+    unsigned int tiling, swizzle;
+
+    /* Y plane */
+    w = obj_surface->orig_width;
+    h = obj_surface->orig_height;
+    w_pitch = obj_surface->width;
+
+    bo = vme_context->surface_state_binding_table.bo;
+    dri_bo_map(bo, True);
+    assert(bo->virtual);
+
+    ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
+    memset(ss, 0, sizeof(*ss));
+
+    ss->ss0.surface_type = I965_SURFACE_2D;
+    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
+
+    ss->ss1.base_addr = obj_surface->bo->offset;
+
+    ss->ss2.width = w / 4 - 1;
+    ss->ss2.height = h - 1;
+
+    ss->ss3.pitch = w_pitch - 1;
+
+    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+    gen75_vme_set_common_surface_tiling(ss, tiling);
+
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, 0,
+                      0,
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
+                      obj_surface->bo);
+
+    ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+    dri_bo_unmap(bo);
+}
+
+static VAStatus
+gen75_vme_output_buffer_setup(VADriverContextP ctx,
+                             struct encode_state *encode_state,
+                             int index,
+                             struct gen6_encoder_context *gen6_encoder_context)
+
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    struct gen7_surface_state *ss;
+    dri_bo *bo;
+    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+    VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+    int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+    int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+    int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+    int num_entries;
+
+    if ( is_intra ) {
+        vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
+    } else {
+        vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
+	/*
+	 * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
+	 * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
+	 * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
+	 */
+    }
+    vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
+    vme_context->vme_output.pitch = 16;
+    bo = dri_bo_alloc(i965->intel.bufmgr, 
+                      "VME output buffer",
+                      vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
+                      0x1000);
+    assert(bo);
+    vme_context->vme_output.bo = bo;
+
+    bo = vme_context->surface_state_binding_table.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+
+    ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
+    memset(ss, 0, sizeof(*ss));
+
+    /* always use 16 bytes as pitch on Sandy Bridge */
+    num_entries = vme_context->vme_output.num_blocks * vme_context->vme_output.size_block / 16;
+
+    ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+    ss->ss1.base_addr = vme_context->vme_output.bo->offset;
+
+    ss->ss2.width = ((num_entries - 1) & 0x7f);
+    ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
+    ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
+
+    ss->ss3.pitch = vme_context->vme_output.pitch - 1;
+
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                      0,
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
+                      vme_context->vme_output.bo);
+
+    ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+    dri_bo_unmap(bo);
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_vme_surface_setup(VADriverContextP ctx, 
+                                       struct encode_state *encode_state,
+                                       int is_intra,
+                                       struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
+
+    /*Setup surfaces state*/
+    /* current picture for encoding */
+    obj_surface = SURFACE(encode_state->current_render_target);
+    assert(obj_surface);
+    gen75_vme_source_surface_state(ctx, 0, obj_surface, gen6_encoder_context);
+    gen75_vme_media_source_surface_state(ctx, 4, obj_surface, gen6_encoder_context);
+
+    if ( ! is_intra ) {
+        /* reference 0 */
+        obj_surface = SURFACE(pPicParameter->reference_picture);
+        assert(obj_surface);
+        gen75_vme_source_surface_state(ctx, 1, obj_surface, gen6_encoder_context);
+        /* reference 1, FIXME: */
+        // obj_surface = SURFACE(pPicParameter->reference_picture);
+        // assert(obj_surface);
+        //gen7_vme_source_surface_state(ctx, 2, obj_surface);
+    }
+
+    /* VME output */
+    gen75_vme_output_buffer_setup(ctx, encode_state, 3, gen6_encoder_context);
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_vme_interface_setup(VADriverContextP ctx, 
+                                         struct encode_state *encode_state,
+                                         struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    struct gen6_interface_descriptor_data *desc;   
+    int i;
+    dri_bo *bo;
+
+    bo = vme_context->idrt.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    desc = bo->virtual;
+
+    for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+        struct i965_kernel *kernel;
+        kernel = &vme_context->vme_kernels[i];
+        assert(sizeof(*desc) == 32);
+        /*Setup the descritor table*/
+        memset(desc, 0, sizeof(*desc));
+        desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
+        desc->desc2.sampler_count = 0; /* FIXME: */
+        desc->desc2.sampler_state_pointer = 0;
+        desc->desc3.binding_table_entry_count = 1; /* FIXME: */
+        desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
+        desc->desc4.constant_urb_entry_read_offset = 0;
+        desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
+ 		
+        /*kernel start*/
+        dri_bo_emit_reloc(bo,	
+                          I915_GEM_DOMAIN_INSTRUCTION, 0,
+                          0,
+                          i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
+                          kernel->bo);
+        desc++;
+    }
+    dri_bo_unmap(bo);
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_vme_constant_setup(VADriverContextP ctx, 
+                                        struct encode_state *encode_state,
+                                        struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    unsigned char *constant_buffer;
+
+    dri_bo_map(vme_context->curbe.bo, 1);
+    assert(vme_context->curbe.bo->virtual);
+    constant_buffer = vme_context->curbe.bo->virtual;
+	
+	/* VME MV/Mb cost table is passed by using const buffer */
+	/* Now it uses the fixed search path. So it is constructed directly
+	 * in the GPU shader.
+	 */
+    memcpy(constant_buffer, (char *)vme_context->vme_state_message, 32);
+
+    dri_bo_unmap( vme_context->curbe.bo);
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
+                                         struct encode_state *encode_state,
+                                         int is_intra,
+                                         struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    unsigned int *vme_state_message;
+    int i;
+	
+    //building VME state message
+    //pass the MV/Mb cost into VME message on HASWell
+    assert(vme_context->vme_state_message);
+    vme_state_message = (unsigned int *)vme_context->vme_state_message;
+
+    vme_state_message[0] = 0x4a4a4a4a;
+    vme_state_message[1] = 0x4a4a4a4a;
+    vme_state_message[2] = 0x4a4a4a4a;
+    vme_state_message[3] = 0x22120200;
+    vme_state_message[4] = 0x62524232;
+
+    for (i=5; i < 8; i++) {
+	vme_state_message[i] = 0;
+     }
+
+    return VA_STATUS_SUCCESS;
+}
+
+static void gen75_vme_pipeline_select(VADriverContextP ctx,
+                                      struct gen6_encoder_context *gen6_encoder_context,
+                                      struct intel_batchbuffer *batch)
+{
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BATCH(batch, 1);
+    OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
+    ADVANCE_BATCH(batch);
+}
+
+static void gen75_vme_state_base_address(VADriverContextP ctx,
+                                         struct gen6_encoder_context *gen6_encoder_context,
+                                         struct intel_batchbuffer *batch)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BATCH(batch, 10);
+
+    OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 8);
+
+    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);				//General State Base Address
+    OUT_RELOC(batch, vme_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);				//Dynamic State Base Address
+    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);				//Indirect Object Base Address
+    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);				//Instruction Base Address
+
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);		//General State Access Upper Bound	
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);		//Dynamic State Access Upper Bound
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);		//Indirect Object Access Upper Bound
+    OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);		//Instruction Access Upper Bound
+
+    /*
+      OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);				//LLC Coherent Base Address
+      OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY );		//LLC Coherent Upper Bound
+    */
+
+    ADVANCE_BATCH(batch);
+}
+
+static void gen75_vme_vfe_state(VADriverContextP ctx,
+                                struct gen6_encoder_context *gen6_encoder_context,
+                                struct intel_batchbuffer *batch)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BATCH(batch, 8);
+
+    OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | 6);					/*Gen6 CMD_MEDIA_STATE_POINTERS = CMD_MEDIA_STATE */
+    OUT_BATCH(batch, 0);												/*Scratch Space Base Pointer and Space*/
+    OUT_BATCH(batch, (vme_context->vfe_state.max_num_threads << 16) 
+              | (vme_context->vfe_state.num_urb_entries << 8) 
+              | (vme_context->vfe_state.gpgpu_mode << 2) );	/*Maximum Number of Threads , Number of URB Entries, MEDIA Mode*/
+    OUT_BATCH(batch, 0);												/*Debug: Object ID*/
+    OUT_BATCH(batch, (vme_context->vfe_state.urb_entry_size << 16) 
+              | vme_context->vfe_state.curbe_allocation_size);				/*URB Entry Allocation Size , CURBE Allocation Size*/
+    OUT_BATCH(batch, 0);											/*Disable Scoreboard*/
+    OUT_BATCH(batch, 0);											/*Disable Scoreboard*/
+    OUT_BATCH(batch, 0);											/*Disable Scoreboard*/
+	
+    ADVANCE_BATCH(batch);
+
+}
+
+static void gen75_vme_curbe_load(VADriverContextP ctx,
+                                 struct gen6_encoder_context *gen6_encoder_context,
+                                 struct intel_batchbuffer *batch)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BATCH(batch, 4);
+
+    OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | 2);
+    OUT_BATCH(batch, 0);
+
+    OUT_BATCH(batch, CURBE_TOTAL_DATA_LENGTH);
+    OUT_RELOC(batch, vme_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+    ADVANCE_BATCH(batch);
+}
+
+static void gen75_vme_idrt(VADriverContextP ctx,
+                           struct gen6_encoder_context *gen6_encoder_context,
+                           struct intel_batchbuffer *batch)
+{
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BATCH(batch, 4);
+
+    OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | 2);	
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, GEN6_VME_KERNEL_NUMBER * sizeof(struct gen6_interface_descriptor_data));
+    OUT_RELOC(batch, vme_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+    ADVANCE_BATCH(batch);
+}
+
+static int gen75_vme_media_object(VADriverContextP ctx, 
+                                  struct encode_state *encode_state,
+                                  int mb_x, int mb_y,
+                                  int kernel, unsigned int mb_intra_ub,
+                                  struct gen6_encoder_context *gen6_encoder_context,
+                                  struct intel_batchbuffer *batch)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
+    int mb_width = ALIGN(obj_surface->orig_width, 16) / 16;
+    int len_in_dowrds = 8;
+
+    if (batch == NULL)
+        batch = gen6_encoder_context->base.batch;
+
+    BEGIN_BATCH(batch, len_in_dowrds);
+    
+    OUT_BATCH(batch, CMD_MEDIA_OBJECT | (len_in_dowrds - 2));
+    OUT_BATCH(batch, kernel);		/*Interface Descriptor Offset*/	
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, 0);
+   
+    /*inline data */
+    OUT_BATCH(batch, mb_width << 16 | mb_y << 8 | mb_x);			/*M0.0 Refrence0 X,Y, not used in Intra*/
+
+ 	OUT_BATCH(batch, ((mb_intra_ub << 8) | 0));
+    ADVANCE_BATCH(batch);
+
+    return len_in_dowrds * 4;
+}
+
+static void gen75_vme_media_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+    dri_bo *bo;
+
+    /* constant buffer */
+    dri_bo_unreference(vme_context->curbe.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "Buffer",
+                      CURBE_TOTAL_DATA_LENGTH, 64);
+    assert(bo);
+    vme_context->curbe.bo = bo;
+
+    dri_bo_unreference(vme_context->surface_state_binding_table.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "surface state & binding table",
+                      (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6,
+                      4096);
+    assert(bo);
+    vme_context->surface_state_binding_table.bo = bo;
+
+    /* interface descriptor remapping table */
+    dri_bo_unreference(vme_context->idrt.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr, 
+                      "Buffer", 
+                      MAX_INTERFACE_DESC_GEN6 * sizeof(struct gen6_interface_descriptor_data), 16);
+    assert(bo);
+    vme_context->idrt.bo = bo;
+
+    /* VME output buffer */
+    dri_bo_unreference(vme_context->vme_output.bo);
+    vme_context->vme_output.bo = NULL;
+
+    /* VME state */
+    dri_bo_unreference(vme_context->vme_state.bo);
+    vme_context->vme_state.bo = NULL;
+
+    vme_context->vfe_state.max_num_threads = 60 - 1;
+    vme_context->vfe_state.num_urb_entries = 16;
+    vme_context->vfe_state.gpgpu_mode = 0;
+    vme_context->vfe_state.urb_entry_size = 59 - 1;
+    vme_context->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
+}
+
+#define		INTRA_PRED_AVAIL_FLAG_AE	0x60
+#define		INTRA_PRED_AVAIL_FLAG_B		0x10
+#define		INTRA_PRED_AVAIL_FLAG_C       	0x8
+#define		INTRA_PRED_AVAIL_FLAG_D		0x4
+#define		INTRA_PRED_AVAIL_FLAG_BCD_MASK	0x1C
+
+static void gen75_vme_pipeline_programing(VADriverContextP ctx, 
+                                         struct encode_state *encode_state,
+                                         struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *main_batch = gen6_encoder_context->base.batch;
+    VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+    int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+    int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+    int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+    int emit_new_state = 1, object_len_in_bytes;
+    int x, y;
+    unsigned int mb_intra_ub; 
+    struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, width_in_mbs * height_in_mbs * 8 * 4 + 0x200);
+
+    intel_batchbuffer_start_atomic(batch, width_in_mbs * height_in_mbs * 8 * 4 + 0x100);
+
+    for(y = 0; y < height_in_mbs; y++){
+        for(x = 0; x < width_in_mbs; x++){	
+	    mb_intra_ub = 0;
+	    if (x != 0) {
+		mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+	    }
+	    if (y != 0) {
+		mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+		if (x != 0)
+			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+		if (x != (width_in_mbs -1))
+			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+	    }
+
+            if (emit_new_state) {
+                /*Step1: MI_FLUSH/PIPE_CONTROL*/
+                intel_batchbuffer_emit_mi_flush(batch);
+
+                /*Step2: State command PIPELINE_SELECT*/
+                gen75_vme_pipeline_select(ctx, gen6_encoder_context, batch);
+
+                /*Step3: State commands configuring pipeline states*/
+                gen75_vme_state_base_address(ctx, gen6_encoder_context, batch);
+                gen75_vme_vfe_state(ctx, gen6_encoder_context, batch);
+                gen75_vme_curbe_load(ctx, gen6_encoder_context, batch);
+                gen75_vme_idrt(ctx, gen6_encoder_context, batch);
+
+                emit_new_state = 0;
+            }
+
+            /*Step4: Primitive commands*/
+            object_len_in_bytes = gen75_vme_media_object(ctx, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, mb_intra_ub, gen6_encoder_context, batch);
+
+            if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
+                assert(0);
+                intel_batchbuffer_end_atomic(batch);	
+                intel_batchbuffer_flush(batch);
+                emit_new_state = 1;
+                intel_batchbuffer_start_atomic(batch, 0x1000);
+            }
+        }
+    }
+
+    intel_batchbuffer_align(batch, 8);
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, 0);
+    OUT_BATCH(batch, MI_BATCH_BUFFER_END);
+    ADVANCE_BATCH(batch);
+
+    intel_batchbuffer_end_atomic(batch);
+
+    /* chain to the main batch buffer */
+    intel_batchbuffer_start_atomic(main_batch, 0x100);
+    intel_batchbuffer_emit_mi_flush(main_batch);
+    BEGIN_BATCH(main_batch, 2);
+    OUT_BATCH(main_batch, MI_BATCH_BUFFER_START | (2 << 6));
+    OUT_RELOC(main_batch,
+              batch->buffer,
+              I915_GEM_DOMAIN_COMMAND, 0,
+              0);
+    ADVANCE_BATCH(main_batch);
+    intel_batchbuffer_end_atomic(main_batch);
+
+    // end programing             
+    intel_batchbuffer_free(batch);
+}
+
+static VAStatus gen75_vme_prepare(VADriverContextP ctx, 
+                                 struct encode_state *encode_state,
+                                 struct gen6_encoder_context *gen6_encoder_context)
+{
+    VAStatus vaStatus = VA_STATUS_SUCCESS;
+    VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+    int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+	
+        gen75_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context);
+
+    gen75_vme_interface_setup(ctx, encode_state, gen6_encoder_context);
+    gen75_vme_vme_state_setup(ctx, encode_state, is_intra, gen6_encoder_context);
+    gen75_vme_constant_setup(ctx, encode_state, gen6_encoder_context);
+
+    /*Programing media pipeline*/
+    gen75_vme_pipeline_programing(ctx, encode_state, gen6_encoder_context);
+
+    return vaStatus;
+}
+
+static VAStatus gen75_vme_run(VADriverContextP ctx, 
+                             struct encode_state *encode_state,
+                             struct gen6_encoder_context *gen6_encoder_context)
+{
+    struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+
+    intel_batchbuffer_flush(batch);
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_vme_stop(VADriverContextP ctx, 
+                              struct encode_state *encode_state,
+                              struct gen6_encoder_context *gen6_encoder_context)
+{
+    return VA_STATUS_SUCCESS;
+}
+
+VAStatus gen75_vme_pipeline(VADriverContextP ctx,
+                           VAProfile profile,
+                           struct encode_state *encode_state,
+                           struct gen6_encoder_context *gen6_encoder_context)
+{
+    gen75_vme_media_init(ctx, gen6_encoder_context);
+    gen75_vme_prepare(ctx, encode_state, gen6_encoder_context);
+    gen75_vme_run(ctx, encode_state, gen6_encoder_context);
+    gen75_vme_stop(ctx, encode_state, gen6_encoder_context);
+
+    return VA_STATUS_SUCCESS;
+}
+
+Bool gen75_vme_context_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int i;
+
+        memcpy(vme_context->vme_kernels, gen75_vme_kernels, sizeof(vme_context->vme_kernels));
+
+    for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+        /*Load kernel into GPU memory*/	
+        struct i965_kernel *kernel = &vme_context->vme_kernels[i];
+
+        kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
+                                  kernel->name, 
+                                  kernel->size,
+                                  0x1000);
+        assert(kernel->bo);
+        dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
+    }
+    
+	vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
+    return True;
+}
+
+Bool gen75_vme_context_destroy(struct gen6_vme_context *vme_context)
+{
+    int i;
+
+    dri_bo_unreference(vme_context->idrt.bo);
+    vme_context->idrt.bo = NULL;
+
+    dri_bo_unreference(vme_context->surface_state_binding_table.bo);
+    vme_context->surface_state_binding_table.bo = NULL;
+
+    dri_bo_unreference(vme_context->curbe.bo);
+    vme_context->curbe.bo = NULL;
+
+    dri_bo_unreference(vme_context->vme_output.bo);
+    vme_context->vme_output.bo = NULL;
+
+    dri_bo_unreference(vme_context->vme_state.bo);
+    vme_context->vme_state.bo = NULL;
+
+    for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+        /*Load kernel into GPU memory*/	
+        struct i965_kernel *kernel = &vme_context->vme_kernels[i];
+
+        dri_bo_unreference(kernel->bo);
+        kernel->bo = NULL;
+    }
+
+    if (vme_context->vme_state_message) {
+	free(vme_context->vme_state_message);
+	vme_context->vme_state_message = NULL;
+    }
+
+    return True;
+}
diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c
new file mode 100644
index 0000000..b1bef7b
--- /dev/null
+++ b/src/gen75_vpp_vebox.c
@@ -0,0 +1,861 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *   Li Xiaowei <xiaowei.a.li at intel.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+#include "i965_defines.h"
+#include "i965_structs.h"
+#include "gen75_vpp_vebox.h"
+
+#define PI  3.1415926
+
+extern VAStatus 
+i965_CreateSurfaces(VADriverContextP ctx,
+                    int width,
+                    int height,
+                    int format,
+                    int num_surfaces,
+                    VASurfaceID *surfaces);      
+
+int format_convert(float src, int out_int_bits, int out_frac_bits,int out_sign_flag)
+{
+     unsigned char negative_flag = (src < 0.0) ? 1 : 0;
+     float src_1 = (!negative_flag)? src: -src ;
+     unsigned int factor = 1 << out_frac_bits;
+     int output_value = 0;         
+ 
+     unsigned int integer_part = 0;//floor(src_1);
+     unsigned int fraction_part = ((int)((src_1 - integer_part) * factor)) & (factor - 1) ;
+
+     output_value = (integer_part << out_frac_bits) | fraction_part;
+
+     if(negative_flag)
+         output_value = (~output_value + 1) & ((1 <<(out_int_bits + out_frac_bits)) -1);
+
+     if(out_sign_flag == 1 && negative_flag)
+     {
+          output_value |= negative_flag <<(out_int_bits + out_frac_bits);
+     }
+     return output_value;
+}
+
+void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+    unsigned int* p_table ;
+    /*
+    VAProcFilterParameterBufferDeinterlacing *di_param =
+            (VAProcFilterParameterBufferDeinterlacing *) proc_ctx->filter_di;
+
+    VAProcFilterParameterBuffer * dn_param =
+            (VAProcFilterParameterBuffer *) proc_ctx->filter_dn;
+    */
+    p_table = (unsigned int *)proc_ctx->dndi_state_table.ptr;
+
+    *p_table ++ = 0;               // reserved  . w0
+    *p_table ++ = ( 0   << 24 |    // denoise STAD threshold . w1
+                    128 << 16 |    // dnmh_history_max
+                    0   << 12 |    // reserved
+                    8   << 8  |    // dnmh_delta[3:0]
+                    0 );           // denoise ASD threshold
+
+    *p_table ++ = ( 0  << 30 |    // reserved . w2
+                    16 << 24 |    // temporal diff th
+                    0  << 22 |    // reserved.
+                    8  << 16 |    // low temporal diff th
+                    0  << 13 |    // STMM C2
+                    0  << 8  |    // denoise moving pixel th
+                    64 );         // denoise th for sum of complexity measure
+
+    *p_table ++ = ( 0 << 30  |   // reserved . w3
+                    4 << 24  |   // good neighbor th[5:0]
+                    9 << 20  |   // CAT slope minus 1
+                    5 << 16  |   // SAD Tight in
+                    0 << 14  |   // smooth mv th
+                    0 << 12  |   // reserved
+                    1 << 8   |   // bne_edge_th[3:0]
+                    15 );        // block noise estimate noise th
+
+    *p_table ++ = ( 0  << 31  |  // STMM blending constant select. w4
+                    64 << 24  |  // STMM trc1
+                    0  << 16  |  // STMM trc2
+                    0  << 14  |  // reserved
+                    2  << 8   |  // VECM_mul
+                    128 );       // maximum STMM
+
+    *p_table ++ = ( 0  << 24  |  // minumum STMM  . W5
+                    0  << 22  |  // STMM shift down
+                    0  << 20  |  // STMM shift up
+                    7  << 16  |  // STMM output shift
+                    128 << 8  |  // SDI threshold
+                    8 );         // SDI delta
+
+    *p_table ++ = ( 0 << 24  |   // SDI fallback mode 1 T1 constant . W6
+                    0 << 16  |   // SDI fallback mode 1 T2 constant
+                    0 << 8   |   // SDI fallback mode 2 constant(angle2x1)
+                    0 );         // FMD temporal difference threshold
+
+    *p_table ++ = ( 32 << 24  |  // FMD #1 vertical difference th . w7
+                    32 << 16  |  // FMD #2 vertical difference th
+                    1  << 14  |  // CAT th1
+                    32 << 8   |  // FMD tear threshold
+                    0  << 7   |  // MCDI Enable, use motion compensated deinterlace algorithm
+                    0  << 6   |  // progressive DN
+                    0  << 4   |  // reserved
+                    0  << 3   |  // DN/DI Top First
+                    0 );         // reserved
+
+    *p_table ++ = ( 0  << 29  |  // reserved . W8
+                    0  << 23  |  // dnmh_history_init[5:0]
+                    10 << 19  |  // neighborPixel th
+                    0  << 18  |  // reserved
+                    0  << 16  |  // FMD for 2nd field of previous frame
+                    25 << 10  |  // MC pixel consistency th
+                    0  << 8   |  // FMD for 1st field for current frame
+                    10 << 4   |  // SAD THB
+                    5 );         // SAD THA
+
+    *p_table ++ = ( 0 << 24  |  // reserved
+                    0 << 16  |  // chr_dnmh_stad_th
+                    0 << 13  |  // reserved
+                    0 << 12  |  // chrome denoise enable
+                    0 << 6   |  // chr temp diff th
+                    0 );        // chr temp diff low
+
+}
+
+void hsw_veb_iecp_std_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+    unsigned int *p_table = proc_ctx->iecp_state_table.ptr + 0 ;
+    /*
+      VAProcFilterParameterBuffer * std_param =
+             (VAProcFilterParameterBuffer *) proc_ctx->filter_std;
+    */
+    if(!(proc_ctx->filters_mask & VPP_IECP_STD_STE)){ 
+        memset(p_table, 0, 29 * 4);
+    }else{
+        *p_table ++ = 0x9a6e39f0;
+        *p_table ++ = 0x400c0000;
+        *p_table ++ = 0x00001180;
+        *p_table ++ = 0xfe2f2e00;
+        *p_table ++ = 0x000000ff;
+
+        *p_table ++ = 0x00140000;
+        *p_table ++ = 0xd82e0000;
+        *p_table ++ = 0x8285ecec;
+        *p_table ++ = 0x00008282;
+        *p_table ++ = 0x00000000;
+
+        *p_table ++ = 0x02117000;
+        *p_table ++ = 0xa38fec96;
+        *p_table ++ = 0x0000c8c8;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x01478000;
+ 
+        *p_table ++ = 0x0007c306;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x1c1bd000;
+        *p_table ++ = 0x00000000;
+
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x0007cf80;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00000000;
+
+        *p_table ++ = 0x1c080000;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00000000;
+    }
+}
+
+void hsw_veb_iecp_ace_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+   unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 116);
+
+    if(!(proc_ctx->filters_mask & VPP_IECP_ACE)){ 
+        memset(p_table, 0, 13 * 4);
+    }else{
+        *p_table ++ = 0x00000068;
+        *p_table ++ = 0x4c382410;
+        *p_table ++ = 0x9c887460;
+        *p_table ++ = 0xebd8c4b0;
+        *p_table ++ = 0x604c3824;
+
+        *p_table ++ = 0xb09c8874;
+        *p_table ++ = 0x0000d8c4;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00000000;
+
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00000000;
+   }
+}
+
+void hsw_veb_iecp_tcc_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+    unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 168);
+    /*
+      VAProcFilterParameterBuffer * tcc_param =
+              (VAProcFilterParameterBuffer *) proc_ctx->filter_iecp_tcc;
+   */
+   if(!(proc_ctx->filters_mask & VPP_IECP_TCC)){ 
+        memset(p_table, 0, 11 * 4);
+    }else{
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x1e34cc91;
+        *p_table ++ = 0x3e3cce91;
+        *p_table ++ = 0x02e80195;
+
+        *p_table ++ = 0x0197046b;
+        *p_table ++ = 0x01790174;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x03030000;
+
+        *p_table ++ = 0x009201c0;
+   }
+}
+
+void hsw_veb_iecp_pro_amp_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+    unsigned int contrast = 0x80;  //default 
+    int brightness = 0x00;         //default
+    int cos_c_s    = 256 ;         //default
+    int sin_c_s    = 0;            //default 
+    unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 212);
+
+    if(!(proc_ctx->filters_mask & VPP_IECP_PRO_AMP)){
+        memset(p_table, 0, 2 * 4);
+    }else {
+        float  tmp_value = 0.0;
+        float  src_saturation = 1.0;
+        float  src_hue = 0.0;
+        float  src_contrast   = 1.0;
+        /*
+        float  src_brightness = 0.0;
+
+        VAProcFilterParameterBufferColorBalance * amp_param =
+        (VAProcFilterParameterBufferColorBalance *) proc_ctx->filter_iecp_amp;
+        VAProcColorBalanceType attrib = amp_param->attrib;
+
+        if(attrib == VAProcColorBalanceHue) {
+           src_hue = amp_param->value;         //(-180.0, 180.0)
+        }else if(attrib == VAProcColorBalanceSaturation) {
+           src_saturation = amp_param->value; //(0.0, 10.0)
+        }else if(attrib == VAProcColorBalanceBrightness) {
+           src_brightness = amp_param->value; // (-100.0, 100.0)
+           brightness = format_convert(src_brightness, 7, 4, 1);
+        }else if(attrib == VAProcColorBalanceContrast) {
+           src_contrast = amp_param->value;  //  (0.0, 10.0)
+           contrast = format_convert(src_contrast, 4, 7, 0);
+        }
+        */
+        tmp_value = cos(src_hue/180*PI) * src_contrast * src_saturation;
+        cos_c_s = format_convert(tmp_value, 7, 8, 1);
+        
+        tmp_value = sin(src_hue/180*PI) * src_contrast * src_saturation;
+        sin_c_s = format_convert(tmp_value, 7, 8, 1);
+     
+        *p_table ++ = ( 0 << 28 |         //reserved
+                        contrast << 17 |  //contrast value (U4.7 format)
+                        0 << 13 |         //reserved
+                        brightness << 1|  // S7.4 format
+                        1);
+
+        *p_table ++ = ( cos_c_s << 16 |  // cos(h) * contrast * saturation
+                        sin_c_s);        // sin(h) * contrast * saturation
+                 
+    }
+}
+
+
+void hsw_veb_iecp_csc_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+    unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 220);
+    float tran_coef[9] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
+    float v_coef[3]    = {0.0, 0.0, 0.0};
+    float u_coef[3]    = {0.0, 0.0, 0.0};
+    int   is_transform_enabled = 0;
+
+    if(!(proc_ctx->filters_mask & VPP_IECP_CSC)){
+        memset(p_table, 0, 8 * 4);
+        return;
+    }
+    /*
+    VAProcColorStandardType   in_color_std  = proc_ctx->pipeline_param->surface_color_standard;
+    VAProcColorStandardType   out_color_std = proc_ctx->pipeline_param->output_color_standard;
+    assert(in_color_std == out_color_std);  
+    */
+    if(proc_ctx->fourcc_input == VA_FOURCC('R','G','B','A') &&
+       (proc_ctx->fourcc_output == VA_FOURCC('N','V','1','2') ||
+        proc_ctx->fourcc_output == VA_FOURCC('Y','V','1','2') ||
+        proc_ctx->fourcc_output == VA_FOURCC('Y','V','Y','2') ||
+        proc_ctx->fourcc_output == VA_FOURCC('A','Y','U','V'))) {
+
+         tran_coef[0] = 0.257;
+         tran_coef[1] = 0.504;
+         tran_coef[2] = 0.098;
+         tran_coef[3] = -0.148;
+         tran_coef[4] = -0.291;
+         tran_coef[5] = 0.439;
+         tran_coef[6] = 0.439;
+         tran_coef[7] = -0.368;
+         tran_coef[8] = -0.071; 
+
+         u_coef[0] = 16 * 4;
+         u_coef[1] = 128 * 4;
+         u_coef[2] = 128 * 4;
+ 
+         is_transform_enabled = 1; 
+    }else if((proc_ctx->fourcc_input  == VA_FOURCC('N','V','1','2') || 
+              proc_ctx->fourcc_input  == VA_FOURCC('Y','V','1','2') || 
+              proc_ctx->fourcc_input  == VA_FOURCC('Y','U','Y','2') ||
+              proc_ctx->fourcc_input  == VA_FOURCC('A','Y','U','V'))&&
+              proc_ctx->fourcc_output == VA_FOURCC('R','G','B','A')) {
+
+         tran_coef[0] = 1.164;
+         tran_coef[1] = 0.000;
+         tran_coef[2] = 1.569;
+         tran_coef[3] = 1.164;
+         tran_coef[4] = -0.813;
+         tran_coef[5] = -0.392;
+         tran_coef[6] = 1.164;
+         tran_coef[7] = 2.017;
+         tran_coef[8] = 0.000; 
+
+         v_coef[0] = -16 * 4;
+         v_coef[1] = -128 * 4;
+         v_coef[2] = -128 * 4;
+
+        is_transform_enabled = 1; 
+    }else if(proc_ctx->fourcc_input != proc_ctx->fourcc_output){
+         //enable when input and output format are different.
+         is_transform_enabled = 1;
+    }
+
+    if(is_transform_enabled == 0){
+        memset(p_table, 0, 8 * 4);
+    }else{
+        *p_table ++ = ( 0 << 29 | //reserved
+                        format_convert(tran_coef[1], 2, 10, 1) << 16 | //c1, s2.10 format
+                        format_convert(tran_coef[0], 2, 10, 1) << 3 |  //c0, s2.10 format
+                        0 << 2 | //reserved
+                        0 << 1 | // yuv_channel swap
+                        is_transform_enabled);                
+
+        *p_table ++ = ( 0 << 26 | //reserved
+                        format_convert(tran_coef[3], 2, 10, 1) << 13 | 
+                        format_convert(tran_coef[2], 2, 10, 1));
+    
+        *p_table ++ = ( 0 << 26 | //reserved
+                        format_convert(tran_coef[5], 2, 10, 1) << 13 | 
+                        format_convert(tran_coef[4], 2, 10, 1));
+
+        *p_table ++ = ( 0 << 26 | //reserved
+                        format_convert(tran_coef[7], 2, 10, 1) << 13 | 
+                        format_convert(tran_coef[6], 2, 10, 1));
+
+        *p_table ++ = ( 0 << 13 | //reserved
+                        format_convert(tran_coef[8], 2, 10, 1));
+
+        *p_table ++ = ( 0 << 22 | //reserved
+                        format_convert(u_coef[0], 10, 0, 1) << 11 | 
+                        format_convert(v_coef[0], 10, 0, 1));
+
+        *p_table ++ = ( 0 << 22 | //reserved
+                        format_convert(u_coef[1], 10, 0, 1) << 11 | 
+                        format_convert(v_coef[1], 10, 0, 1));
+
+        *p_table ++ = ( 0 << 22 | //reserved
+                        format_convert(u_coef[2], 10, 0, 1) << 11 | 
+                        format_convert(v_coef[2], 10, 0, 1));
+    }
+}
+
+void hsw_veb_iecp_aoi_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+    unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 252);
+    /*
+     VAProcFilterParameterBuffer * tcc_param =
+             (VAProcFilterParameterBuffer *) proc_ctx->filter_iecp_tcc;
+    */
+    if(!(proc_ctx->filters_mask & VPP_IECP_AOI)){ 
+        memset(p_table, 0, 3 * 4);
+    }else{
+        *p_table ++ = 0x00000000;
+        *p_table ++ = 0x00030000;
+        *p_table ++ = 0x00030000;
+   }
+}
+
+void hsw_veb_state_table_setup(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+    if(proc_ctx->filters_mask & 0x000000ff) {
+        dri_bo *dndi_bo = proc_ctx->dndi_state_table.bo;
+        dri_bo_map(dndi_bo, 1);
+        proc_ctx->dndi_state_table.ptr = dndi_bo->virtual;
+
+        hsw_veb_dndi_table(ctx, proc_ctx);
+
+        dri_bo_unmap(dndi_bo);
+    }
+
+    if(proc_ctx->filters_mask & 0x0000ff00 ||
+       proc_ctx->fourcc_input != proc_ctx->fourcc_output) {
+        dri_bo *iecp_bo = proc_ctx->iecp_state_table.bo;
+        dri_bo_map(iecp_bo, 1);
+        proc_ctx->iecp_state_table.ptr = iecp_bo->virtual;
+
+        hsw_veb_iecp_std_table(ctx, proc_ctx);
+        hsw_veb_iecp_ace_table(ctx, proc_ctx);
+        hsw_veb_iecp_tcc_table(ctx, proc_ctx);
+        hsw_veb_iecp_pro_amp_table(ctx, proc_ctx);
+        hsw_veb_iecp_csc_table(ctx, proc_ctx);
+        hsw_veb_iecp_aoi_table(ctx, proc_ctx);
+   
+        dri_bo_unmap(iecp_bo);
+    }
+}
+
+void hsw_veb_state_command(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+    struct intel_batchbuffer *batch = proc_ctx->batch;
+    unsigned int is_dn_enabled   = (proc_ctx->filters_mask & 0x01)? 1: 0;
+    unsigned int is_di_enabled   = (proc_ctx->filters_mask & 0x02)? 1: 0;
+    unsigned int is_iecp_enabled = (proc_ctx->filters_mask & 0xff00)?1:0;
+
+    BEGIN_VEB_BATCH(batch, 6);
+    OUT_VEB_BATCH(batch, VEB_STATE | (6 - 2));
+    OUT_VEB_BATCH(batch,
+                  0 << 26 |       // state surface control bits
+                  0 << 11 |       // reserved.
+                  0 << 10 |       // pipe sync disable
+                  2 << 8  |       // DI output frame
+                  0 << 7  |       // 444->422 downsample method
+                  0 << 6  |       // 422->420 downsample method
+                  !!(proc_ctx->is_first_frame && (is_di_enabled || is_dn_enabled)) << 5  |   // DN/DI first frame
+                  is_di_enabled   << 4  |             // DI enable
+                  is_dn_enabled   << 3  |             // DN enable
+                  is_iecp_enabled << 2  |             // global IECP enabled
+                  0 << 1  |       // ColorGamutCompressionEnable
+                  0 ) ;           // ColorGamutExpansionEnable.
+
+    OUT_RELOC(batch, 
+              proc_ctx->dndi_state_table.bo,
+              I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+    OUT_RELOC(batch,
+              proc_ctx->iecp_state_table.bo, 
+              I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+    OUT_RELOC(batch,
+              proc_ctx->gamut_state_table.bo, 
+              I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+    OUT_RELOC(batch,
+              proc_ctx->vertex_state_table.bo, 
+              I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+    ADVANCE_VEB_BATCH(batch);
+}
+
+void hsw_veb_surface_state(VADriverContextP ctx, struct intel_vebox_context *proc_ctx, unsigned int is_output)
+{
+    struct  i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = proc_ctx->batch;
+    unsigned int u_offset_y = 0, v_offset_y = 0;
+    unsigned int is_uv_interleaved = 0, tiling = 0, swizzle = 0;
+    unsigned int surface_format = PLANAR_420_8;
+    struct object_surface* obj_surf = NULL;
+    unsigned int surface_pitch = 0;
+    unsigned int half_pitch_chroma = 0;
+
+    if(is_output){   
+         obj_surf = SURFACE(proc_ctx->frame_store[FRAME_OUT_CURRENT].surface_id);
+    }else {
+         obj_surf = SURFACE(proc_ctx->frame_store[FRAME_IN_CURRENT].surface_id);
+    }
+
+    if (obj_surf->fourcc == VA_FOURCC_NV12) {
+        surface_format = PLANAR_420_8;
+        surface_pitch = obj_surf->width; 
+        printf("NV12, is_output=%d, width = %d, pitch is =  %d\n",is_output, obj_surf->orig_width, obj_surf->width);
+        is_uv_interleaved = 1;
+        half_pitch_chroma = 0;
+    } else if (obj_surf->fourcc == VA_FOURCC_YUY2) {
+        surface_format = YCRCB_NORMAL;
+        surface_pitch = obj_surf->width * 2; 
+        is_uv_interleaved = 0;
+        half_pitch_chroma = 0;
+    } else if (obj_surf->fourcc == VA_FOURCC_AYUV) {
+        surface_format = PACKED_444A_8;
+        surface_pitch = obj_surf->width * 4; 
+        is_uv_interleaved = 0;
+        half_pitch_chroma = 0;
+    } else if (obj_surf->fourcc == VA_FOURCC_RGBA) {
+        surface_format = R8G8B8A8_UNORM_SRGB;
+        surface_pitch = obj_surf->width * 4; 
+        is_uv_interleaved = 0;
+        half_pitch_chroma = 0;
+    }
+
+    u_offset_y = obj_surf->y_cb_offset;
+    v_offset_y = obj_surf->y_cr_offset;
+     
+    dri_bo_get_tiling(obj_surf->bo, &tiling, &swizzle);
+
+    BEGIN_VEB_BATCH(batch, 6);
+    OUT_VEB_BATCH(batch, VEB_SURFACE_STATE | (6 - 2));
+    OUT_VEB_BATCH(batch,
+                  0 << 1 |         // reserved
+                  is_output);      // surface indentification.
+
+    OUT_VEB_BATCH(batch,
+                  (proc_ctx->pic_height - 1) << 18 |  // height . w3
+                  (proc_ctx->pic_width )  << 4  |  // width
+                  0);                                 // reserve
+
+    OUT_VEB_BATCH(batch,
+                  surface_format      << 28  |  // surface format, YCbCr420. w4
+                  is_uv_interleaved   << 27  |  // interleave chrome , two seperate palar
+                  0                   << 20  |  // reserved
+                  (surface_pitch - 1) << 3   |  // surface pitch, 64 align
+                  half_pitch_chroma   << 2   |  // half pitch for chrome
+                  !!tiling            << 1   |  // tiled surface, linear surface used
+                  (tiling == I915_TILING_Y));   // tiled walk, ignored when liner surface
+
+    OUT_VEB_BATCH(batch,
+                  0 << 29  |     // reserved . w5
+                  0 << 16  |     // X offset for V(Cb)
+                  0 << 15  |     // reserved
+                  u_offset_y);   // Y offset for V(Cb)
+
+    OUT_VEB_BATCH(batch,
+                  0 << 29  |     // reserved . w6
+                  0 << 16  |     // X offset for V(Cr)
+                  0 << 15  |     // reserved
+                  v_offset_y );  // Y offset for V(Cr)
+
+    ADVANCE_VEB_BATCH(batch);
+}
+
+void hsw_veb_dndi_iecp_command(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+    struct intel_batchbuffer *batch = proc_ctx->batch;
+    unsigned char frame_ctrl_bits = 0;
+    unsigned int startingX = 0;
+    unsigned int endingX = proc_ctx->pic_width;
+
+    BEGIN_VEB_BATCH(batch, 10);
+    OUT_VEB_BATCH(batch, VEB_DNDI_IECP_STATE | (10 - 2));
+    OUT_VEB_BATCH(batch,
+                  startingX << 16 |
+                  endingX);
+    OUT_RELOC(batch,
+              proc_ctx->frame_store[FRAME_IN_CURRENT].bo,
+              I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);
+    OUT_RELOC(batch,
+              proc_ctx->frame_store[FRAME_IN_PREVIOUS].bo,
+              I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);
+    OUT_RELOC(batch,
+              proc_ctx->frame_store[FRAME_IN_STMM].bo,
+              I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);
+    OUT_RELOC(batch,
+              proc_ctx->frame_store[FRAME_OUT_STMM].bo,
+              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);
+    OUT_RELOC(batch,
+              proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].bo,
+              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);
+    OUT_RELOC(batch,
+              proc_ctx->frame_store[FRAME_OUT_CURRENT].bo,
+              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);
+    OUT_RELOC(batch,
+              proc_ctx->frame_store[FRAME_OUT_PREVIOUS].bo,
+              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);
+    OUT_RELOC(batch,
+              proc_ctx->frame_store[FRAME_OUT_STATISTIC].bo,
+              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);
+
+    ADVANCE_VEB_BATCH(batch);
+}
+
+
+void hsw_veb_surface_reference(VADriverContextP ctx,
+                              struct intel_vebox_context *proc_ctx)
+{
+    struct object_surface * obj_surf; 
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    /* update the input surface */ 
+     obj_surf = SURFACE(proc_ctx->surface_input);
+     proc_ctx->frame_store[FRAME_IN_CURRENT].surface_id = proc_ctx->surface_input;
+     proc_ctx->frame_store[FRAME_IN_CURRENT].bo = obj_surf->bo;
+     proc_ctx->frame_store[FRAME_IN_CURRENT].is_internal_surface = 0;
+     dri_bo_reference(proc_ctx->frame_store[FRAME_IN_CURRENT].bo);
+
+     /* update the output surface */ 
+     if(proc_ctx->filters_mask == VPP_DNDI_DN){
+         obj_surf = SURFACE(proc_ctx->surface_output);
+         proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].surface_id = proc_ctx->surface_output;
+         proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].bo = obj_surf->bo;
+         proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].is_internal_surface = 0;
+         dri_bo_reference(proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].bo);
+     }else {
+         obj_surf = SURFACE(proc_ctx->surface_output);
+         proc_ctx->frame_store[FRAME_OUT_CURRENT].surface_id = proc_ctx->surface_output;
+         proc_ctx->frame_store[FRAME_OUT_CURRENT].bo = obj_surf->bo;
+         proc_ctx->frame_store[FRAME_OUT_CURRENT].is_internal_surface = 0;
+         dri_bo_reference(proc_ctx->frame_store[FRAME_OUT_CURRENT].bo);
+     } 
+}
+
+void hsw_veb_surface_unreference(VADriverContextP ctx,
+                                 struct intel_vebox_context *proc_ctx)
+{
+    /* unreference the input surface */ 
+    dri_bo_unreference(proc_ctx->frame_store[FRAME_IN_CURRENT].bo);
+    proc_ctx->frame_store[FRAME_IN_CURRENT].surface_id = -1;
+    proc_ctx->frame_store[FRAME_IN_CURRENT].bo = NULL;
+    proc_ctx->frame_store[FRAME_IN_CURRENT].is_internal_surface = 0;
+    dri_bo_unreference(proc_ctx->frame_store[FRAME_IN_CURRENT].bo);
+
+    /* unreference the shared output surface */ 
+    if(proc_ctx->filters_mask == VPP_DNDI_DN){
+       proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].surface_id = -1;
+       proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].bo = NULL;
+       proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].is_internal_surface = 0;
+       dri_bo_unreference(proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].bo);
+    }else{
+        proc_ctx->frame_store[FRAME_OUT_CURRENT].surface_id = -1;
+        proc_ctx->frame_store[FRAME_OUT_CURRENT].bo = NULL;
+        proc_ctx->frame_store[FRAME_OUT_CURRENT].is_internal_surface = 0;
+        dri_bo_unreference(proc_ctx->frame_store[FRAME_OUT_CURRENT].bo);
+     }
+}
+
+void hsw_veb_resource_prepare(VADriverContextP ctx,
+                              struct intel_vebox_context *proc_ctx)
+{
+    VAStatus va_status;
+    dri_bo *bo;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    unsigned int input_fourcc, output_fourcc;
+    unsigned int input_sampling, output_sampling;
+    unsigned int input_tiling, output_tiling;
+    unsigned int i, swizzle;
+
+    struct object_surface* obj_surf_in  = SURFACE(proc_ctx->surface_input);
+    struct object_surface* obj_surf_out = SURFACE(proc_ctx->surface_output);
+    
+    assert(obj_surf_in->orig_width  == obj_surf_out->orig_width &&
+           obj_surf_in->orig_height == obj_surf_out->orig_height);
+
+    proc_ctx->pic_width   = obj_surf_in->orig_width;
+    proc_ctx->pic_height  = obj_surf_in->orig_height;
+  
+    /* record vebox pipeline input surface format information*/
+    if(obj_surf_in->bo == NULL){
+        input_fourcc = VA_FOURCC('N','V','1','2');
+        input_sampling = SUBSAMPLE_YUV420;
+        input_tiling = 1;
+        i965_check_alloc_surface_bo(ctx, obj_surf_in, input_tiling, input_fourcc, input_sampling);
+    } else {
+        input_fourcc = obj_surf_in->fourcc;
+        input_sampling = obj_surf_in->subsampling;
+        dri_bo_get_tiling(obj_surf_in->bo, &input_tiling, &swizzle);
+        input_tiling = !!input_tiling;
+    }
+
+    /* record vebox pipeline output surface format information */
+    if(obj_surf_out->bo == NULL){
+        output_fourcc = VA_FOURCC('N','V','1','2');
+        output_sampling = SUBSAMPLE_YUV420;
+        output_tiling = 1;
+        i965_check_alloc_surface_bo(ctx, obj_surf_out, output_tiling, output_fourcc, output_sampling);
+    }else {
+        output_fourcc   = obj_surf_out->fourcc;
+        output_sampling = obj_surf_out->subsampling;
+        dri_bo_get_tiling(obj_surf_out->bo, &output_tiling, &swizzle);
+        output_tiling = !!output_tiling;
+    }
+   
+    assert(input_fourcc == VA_FOURCC_NV12 ||
+           input_fourcc == VA_FOURCC_YUY2 ||
+           input_fourcc == VA_FOURCC_AYUV ||
+           input_fourcc == VA_FOURCC_RGBA);
+    assert(output_fourcc == VA_FOURCC_NV12 ||
+           output_fourcc == VA_FOURCC_YUY2 ||
+           output_fourcc == VA_FOURCC_AYUV ||
+           output_fourcc == VA_FOURCC_RGBA);
+
+    proc_ctx->fourcc_input = input_fourcc;
+    proc_ctx->fourcc_output = output_fourcc;
+
+    /* allocate vebox pipeline surfaces */
+    VASurfaceID surfaces[FRAME_STORE_SUM];
+    va_status = i965_CreateSurfaces(ctx,
+                                   proc_ctx ->pic_width,
+                                   proc_ctx ->pic_height,
+                                   VA_RT_FORMAT_YUV420,
+                                   FRAME_STORE_SUM,
+                                   surfaces);
+    assert(va_status == VA_STATUS_SUCCESS);
+
+    for(i = FRAME_IN_CURRENT; i < FRAME_STORE_SUM; i ++) {
+        proc_ctx->frame_store[i].surface_id = surfaces[i];
+        struct object_surface* obj_surf = SURFACE(surfaces[i]);
+        if( i == FRAME_IN_CURRENT) {
+            proc_ctx->frame_store[i].surface_id = proc_ctx->surface_input;
+            proc_ctx->frame_store[i].bo = (SURFACE(proc_ctx->surface_input))->bo;
+            proc_ctx->frame_store[i].is_internal_surface = 0;
+            continue;
+        }else if( i == FRAME_IN_PREVIOUS || i == FRAME_OUT_CURRENT_DN) {
+            i965_check_alloc_surface_bo(ctx, obj_surf, input_tiling, input_fourcc, input_sampling);
+        } else if( i == FRAME_IN_STMM || i == FRAME_OUT_STMM){
+            i965_check_alloc_surface_bo(ctx, obj_surf, 1, input_fourcc, input_sampling);
+        } else if( i >= FRAME_OUT_CURRENT){
+            i965_check_alloc_surface_bo(ctx, obj_surf, output_tiling, output_fourcc, output_sampling);
+        }
+        proc_ctx->frame_store[i].bo = obj_surf->bo;
+        dri_bo_reference(proc_ctx->frame_store[i].bo);
+        proc_ctx->frame_store[i].is_internal_surface = 1;
+    }
+
+    /* alloc dndi state table  */
+    dri_bo_unreference(proc_ctx->dndi_state_table.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "vebox: dndi state Buffer",
+                      0x1000, 0x1000);
+    proc_ctx->dndi_state_table.bo = bo;
+    dri_bo_reference(proc_ctx->dndi_state_table.bo);
+ 
+    /* alloc iecp state table  */
+    dri_bo_unreference(proc_ctx->iecp_state_table.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "vebox: iecp state Buffer",
+                      0x1000, 0x1000);
+    proc_ctx->iecp_state_table.bo = bo;
+    dri_bo_reference(proc_ctx->iecp_state_table.bo);
+
+    /* alloc gamut state table  */
+    dri_bo_unreference(proc_ctx->gamut_state_table.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "vebox: gamut state Buffer",
+                      0x1000, 0x1000);
+    proc_ctx->gamut_state_table.bo = bo;
+    dri_bo_reference(proc_ctx->gamut_state_table.bo);
+
+    /* alloc vertex state table  */
+    dri_bo_unreference(proc_ctx->vertex_state_table.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "vertex: iecp state Buffer",
+                      0x1000, 0x1000);
+    proc_ctx->vertex_state_table.bo = bo;
+    dri_bo_reference(proc_ctx->vertex_state_table.bo);
+
+}
+
+VAStatus gen75_vebox_process_picture(VADriverContextP ctx,
+                         struct intel_vebox_context *proc_ctx)
+{
+    VAStatus va_status = VA_STATUS_SUCCESS;
+    
+    if(proc_ctx->is_first_frame) 
+       hsw_veb_resource_prepare(ctx, proc_ctx);
+ 
+    hsw_veb_surface_reference(ctx, proc_ctx);
+
+    intel_batchbuffer_start_atomic_veb(proc_ctx->batch, 0x1000);
+    intel_batchbuffer_emit_mi_flush(proc_ctx->batch);
+    hsw_veb_surface_state(ctx, proc_ctx, INPUT_SURFACE); 
+    hsw_veb_surface_state(ctx, proc_ctx, OUTPUT_SURFACE); 
+    hsw_veb_state_table_setup(ctx, proc_ctx);
+
+    hsw_veb_state_command(ctx, proc_ctx);		
+    hsw_veb_dndi_iecp_command(ctx, proc_ctx);
+    intel_batchbuffer_end_atomic(proc_ctx->batch);
+    intel_batchbuffer_flush(proc_ctx->batch);
+
+    hsw_veb_surface_unreference(ctx, proc_ctx);
+
+   if(proc_ctx->is_first_frame)
+       proc_ctx->is_first_frame = 0; 
+   
+    return va_status;
+}
+
+void gen75_vebox_context_destroy(VADriverContextP ctx, 
+                          struct intel_vebox_context *proc_ctx)
+{
+    int i;
+    /* release vebox pipeline surface */
+    for(i = 0; i < FRAME_STORE_SUM; i ++) {
+        if(proc_ctx->frame_store[i].is_internal_surface){
+            dri_bo_unreference(proc_ctx->frame_store[i].bo);
+        }
+        proc_ctx->frame_store[i].surface_id = -1;
+        proc_ctx->frame_store[i].bo = NULL;
+    }
+    /* release dndi state table  */
+    dri_bo_unreference(proc_ctx->dndi_state_table.bo);
+    proc_ctx->dndi_state_table.bo = NULL;
+
+    /* release iecp state table  */
+    dri_bo_unreference(proc_ctx->iecp_state_table.bo);
+    proc_ctx->dndi_state_table.bo = NULL;
+
+    intel_batchbuffer_free(proc_ctx->batch);
+
+    free(proc_ctx);
+}
+
+struct intel_vebox_context * gen75_vebox_context_init(VADriverContextP ctx)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+    struct intel_vebox_context *proc_context = calloc(1, sizeof(struct intel_vebox_context));
+
+    proc_context->batch = intel_batchbuffer_new(intel, I915_EXEC_VEBOX, 0);
+    memset(proc_context->frame_store, 0, sizeof(VEBFrameStore)*FRAME_STORE_SUM);
+  
+    proc_context->filters_mask             = 0;
+    proc_context->is_first_frame           = 1;
+    proc_context->filters_mask             = 0;
+
+    return proc_context;
+}
+
diff --git a/src/gen75_vpp_vebox.h b/src/gen75_vpp_vebox.h
new file mode 100644
index 0000000..5281c75
--- /dev/null
+++ b/src/gen75_vpp_vebox.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Li Xiaowei <xiaowei.a.li at intel.com>
+ *
+ */
+
+#ifndef _GEN75_VPP_VEBOX_H
+#define _GEN75_VPP_VEBOX_H
+
+#include <xf86drm.h>
+#include <drm.h>
+#include <i915_drm.h>
+#include <intel_bufmgr.h>
+#include "i965_drv_video.h"
+
+#define INPUT_SURFACE  0
+#define OUTPUT_SURFACE 1
+
+#define VPP_DNDI_DN        0x00000001
+#define VPP_DNDI_DI        0x00000002
+#define VPP_IECP_STD_STE   0x00000100
+#define VPP_IECP_ACE       0x00000200
+#define VPP_IECP_TCC       0x00000400
+#define VPP_IECP_PRO_AMP   0x00000800
+#define VPP_IECP_CSC       0x00001000
+#define VPP_IECP_AOI       0x00002000
+#define MAX_FILTER_SUM     8
+
+enum {
+    FRAME_IN_CURRENT = 0,
+    FRAME_IN_PREVIOUS,
+    FRAME_IN_STMM,
+    FRAME_OUT_STMM,
+    FRAME_OUT_CURRENT_DN,
+    FRAME_OUT_CURRENT,
+    FRAME_OUT_PREVIOUS,
+    FRAME_OUT_STATISTIC,
+    FRAME_STORE_SUM,
+};
+
+enum SURFACE_FORMAT{
+    YCRCB_NORMAL = 0,
+    YCRCB_SWAPUVY,
+    YCRCB_SWAPUV,
+    YCRCB_SWAPY,
+    PLANAR_420_8,  //NV12
+    PACKED_444A_8,
+    PACKED_422_16,
+    R10G10B10A2_UNORM_SRGB,
+    R8G8B8A8_UNORM_SRGB,
+    PACKED_444_16,
+    PLANAR_422_16,
+    Y8_UNORM,
+    PLANAR_420_16,
+    R16G16B16A16,
+    SURFACE_FORMAT_SUM
+};
+
+typedef struct veb_frame_store {
+    VASurfaceID surface_id;
+    dri_bo  *bo;
+    unsigned char  is_internal_surface;
+} VEBFrameStore;
+
+typedef struct veb_buffer {
+    dri_bo  *bo;
+    void *  ptr;
+    unsigned char  valid;
+} VEBBuffer;
+
+struct intel_vebox_context
+{
+    struct intel_batchbuffer *batch;
+
+    VASurfaceID surface_input;
+    VASurfaceID surface_output;
+    unsigned int fourcc_input;
+    unsigned int fourcc_output;
+    unsigned int pic_width;
+    unsigned int pic_height;
+ 
+    VEBFrameStore frame_store[FRAME_STORE_SUM];
+
+    VEBBuffer dndi_state_table;
+    VEBBuffer iecp_state_table;
+    VEBBuffer gamut_state_table;
+    VEBBuffer vertex_state_table;
+
+    unsigned int  filters_mask;
+    unsigned char is_first_frame;
+
+    /*
+    VAProcPipelineParameterBuffer * pipeline_param;
+    void * filter_dn;
+    void * filter_di;
+    void * filter_iecp_std;
+    void * filter_iecp_ace;
+    void * filter_iecp_tcc;
+    void * filter_iecp_amp;
+    void * filter_iecp_csc;
+    */
+};
+
+VAStatus gen75_vebox_process_picture(VADriverContextP ctx,
+                         struct intel_vebox_context *proc_ctx);
+
+void gen75_vebox_context_destroy(VADriverContextP ctx, 
+                          struct intel_vebox_context *proc_ctx);
+
+struct intel_vebox_context * gen75_vebox_context_init(VADriverContextP ctx);
+
+#endif
diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
old mode 100644
new mode 100755
index b4584c4..aec9694
--- a/src/gen7_mfd.c
+++ b/src/gen7_mfd.c
@@ -26,15 +26,13 @@
  *
  */
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
+#ifndef HAVE_GEN_AVC_SURFACE
+#define HAVE_GEN_AVC_SURFACE 1
+#endif
 
-#include "config.h"
+#include "sysdeps.h"
 #include "intel_batchbuffer.h"
 #include "intel_driver.h"
-
 #include "i965_defines.h"
 #include "i965_drv_video.h"
 #include "i965_decoder_utils.h"
@@ -168,38 +166,21 @@ gen7_mfd_avc_frame_store_index(VADriverContextP ctx,
     }
 }
 
-static void 
-gen7_mfd_free_avc_surface(void **data)
-{
-    struct gen7_avc_surface *gen7_avc_surface = *data;
-
-    if (!gen7_avc_surface)
-        return;
-
-    dri_bo_unreference(gen7_avc_surface->dmv_top);
-    gen7_avc_surface->dmv_top = NULL;
-    dri_bo_unreference(gen7_avc_surface->dmv_bottom);
-    gen7_avc_surface->dmv_bottom = NULL;
-
-    free(gen7_avc_surface);
-    *data = NULL;
-}
-
 static void
 gen7_mfd_init_avc_surface(VADriverContextP ctx, 
                           VAPictureParameterBufferH264 *pic_param,
                           struct object_surface *obj_surface)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct gen7_avc_surface *gen7_avc_surface = obj_surface->private_data;
+    GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
     int width_in_mbs, height_in_mbs;
 
-    obj_surface->free_private_data = gen7_mfd_free_avc_surface;
-    width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
-    height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
+    obj_surface->free_private_data = gen_free_avc_surface;
+    width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+    height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
 
     if (!gen7_avc_surface) {
-        gen7_avc_surface = calloc(sizeof(struct gen7_avc_surface), 1);
+        gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
         assert((obj_surface->size & 0x3f) == 0);
         obj_surface->private_data = gen7_avc_surface;
     }
@@ -212,6 +193,7 @@ gen7_mfd_init_avc_surface(VADriverContextP ctx,
                                                  "direct mv w/r buffer",
                                                  width_in_mbs * height_in_mbs * 64,
                                                  0x1000);
+        assert(gen7_avc_surface->dmv_top);
     }
 
     if (gen7_avc_surface->dmv_bottom_flag &&
@@ -220,6 +202,7 @@ gen7_mfd_init_avc_surface(VADriverContextP ctx,
                                                     "direct mv w/r buffer",
                                                     width_in_mbs * height_in_mbs * 64,                                                    
                                                     0x1000);
+        assert(gen7_avc_surface->dmv_bottom);
     }
 }
 
@@ -420,14 +403,6 @@ gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
 }
 
 static void
-gen7_mfd_aes_state(VADriverContextP ctx,
-                   struct decode_state *decode_state,
-                   int standard_select)
-{
-    /* FIXME */
-}
-
-static void
 gen7_mfd_qm_state(VADriverContextP ctx,
                   int qm_type,
                   unsigned char *qm,
@@ -446,18 +421,6 @@ gen7_mfd_qm_state(VADriverContextP ctx,
     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
     ADVANCE_BCS_BATCH(batch);
 }
-static void
-gen7_mfd_wait(VADriverContextP ctx,
-              struct decode_state *decode_state,
-              int standard_select,
-              struct gen7_mfd_context *gen7_mfd_context)
-{
-    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
-
-    BEGIN_BCS_BATCH(batch, 1);
-    OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
-    ADVANCE_BCS_BATCH(batch);
-}
 
 static void
 gen7_mfd_avc_img_state(VADriverContextP ctx,
@@ -497,8 +460,8 @@ gen7_mfd_avc_img_state(VADriverContextP ctx,
     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
                         !pic_param->pic_fields.bits.field_pic_flag);
 
-    width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
-    height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
+    width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+    height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
 
     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
@@ -578,7 +541,7 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx,
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
     struct object_surface *obj_surface;
-    struct gen7_avc_surface *gen7_avc_surface;
+    GenAvcSurface *gen7_avc_surface;
     VAPictureH264 *va_pic;
     int i, j;
 
@@ -880,7 +843,7 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx,
     struct object_surface *obj_surface;
     dri_bo *bo;
     int i, j, enable_avc_ildb = 0;
-    int width_in_mbs;
+    unsigned int width_in_mbs, height_in_mbs;
 
     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
@@ -906,7 +869,10 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx,
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
     gen7_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
-    width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
+    width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+    height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
+    assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
+    assert(height_in_mbs > 0 && height_in_mbs <= 256);
 
     /* Current decoded picture */
     va_pic = &pic_param->CurrPic;
@@ -1600,8 +1566,19 @@ gen7_mfd_vc1_pic_state(VADriverContextP ctx,
 
     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
-    else
+    else {
         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
+        /*
+         * 8.3.6.2.1 Transform Type Selection
+         * If variable-sized transform coding is not enabled,
+         * then the 8x8 transform shall be used for all blocks.
+         * it is also MFX_VC1_PIC_STATE requirement.
+         */
+        if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
+            pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
+            pic_param->transform_fields.bits.frame_level_transform_type     = 0;
+        }
+    }
 
 
     if (picture_type == GEN7_VC1_B_PICTURE) {
@@ -1913,7 +1890,7 @@ gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
     intel_batchbuffer_flush(batch);
 }
 
-#ifdef HAVE_JPEG_DECODING
+#ifdef HAVE_VA_JPEG_DECODE
 static void
 gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
                           struct decode_state *decode_state,
@@ -1921,10 +1898,10 @@ gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct object_surface *obj_surface;
-    VAPictureParameterBufferJPEG *pic_param;
+    VAPictureParameterBufferJPEGBaseline *pic_param;
     int subsampling = SUBSAMPLE_YUV420;
 
-    pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
+    pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
 
     if (pic_param->num_components == 1)
         subsampling = SUBSAMPLE_YUV400;
@@ -2005,15 +1982,13 @@ gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
                         struct gen7_mfd_context *gen7_mfd_context)
 {
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
-    VAPictureParameterBufferJPEG *pic_param;
+    VAPictureParameterBufferJPEGBaseline *pic_param;
     int chroma_type = GEN7_YUV420;
     int frame_width_in_blks;
     int frame_height_in_blks;
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
-    pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
-
-    assert(pic_param->type == VA_JPEG_SOF0); /* only support BASELINE on Ivybridge */
+    pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
 
     if (pic_param->num_components == 1)
         chroma_type = GEN7_YUV400;
@@ -2053,20 +2028,20 @@ gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
     if (chroma_type == GEN7_YUV400 ||
         chroma_type == GEN7_YUV444 ||
         chroma_type == GEN7_YUV422V_2Y) {
-        frame_width_in_blks = ((pic_param->image_width + 7) / 8);
-        frame_height_in_blks = ((pic_param->image_height + 7) / 8);
+        frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
+        frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
     } else if (chroma_type == GEN7_YUV411) {
-        frame_width_in_blks = ((pic_param->image_width + 31) / 32) * 4;
-        frame_height_in_blks = ((pic_param->image_height + 31) / 32) * 4;
+        frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
+        frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
     } else {
-        frame_width_in_blks = ((pic_param->image_width + 15) / 16) * 2;
-        frame_height_in_blks = ((pic_param->image_height + 15) / 16) * 2;
+        frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
+        frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
     }
 
     BEGIN_BCS_BATCH(batch, 3);
     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
     OUT_BCS_BATCH(batch,
-                  (va_to_gen7_jpeg_rotation[pic_param->rotation] << 4) |    /* rotation */
+                  (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
                   (chroma_type << 0));
     OUT_BCS_BATCH(batch,
                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
@@ -2085,24 +2060,24 @@ gen7_mfd_jpeg_huff_table_state(VADriverContextP ctx,
                                struct gen7_mfd_context *gen7_mfd_context,
                                int num_tables)
 {
-    VAHuffmanTableBufferJPEG *huffman_table;
+    VAHuffmanTableBufferJPEGBaseline *huffman_table;
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
     int index;
 
     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
         return;
 
-    huffman_table = (VAHuffmanTableBufferJPEG *)decode_state->huffman_table->buffer;
+    huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
 
     for (index = 0; index < num_tables; index++) {
         int id = va_to_gen7_jpeg_hufftable[index];
         BEGIN_BCS_BATCH(batch, 53);
         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
         OUT_BCS_BATCH(batch, id);
-        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_bits, 12);
-        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_huffval, 12);
-        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_bits, 16);
-        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_huffval, 164);
+        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
+        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
+        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
+        intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
         ADVANCE_BCS_BATCH(batch);
     }
 }
@@ -2120,26 +2095,26 @@ gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
                        struct decode_state *decode_state,
                        struct gen7_mfd_context *gen7_mfd_context)
 {
-    VAPictureParameterBufferJPEG *pic_param;
-    VAIQMatrixBufferJPEG *iq_matrix;
+    VAPictureParameterBufferJPEGBaseline *pic_param;
+    VAIQMatrixBufferJPEGBaseline *iq_matrix;
     int index;
 
     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
         return;
 
-    iq_matrix = (VAIQMatrixBufferJPEG *)decode_state->iq_matrix->buffer;
-    pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
+    iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
+    pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
 
     assert(pic_param->num_components <= 3);
 
     for (index = 0; index < pic_param->num_components; index++) {
         int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
-        unsigned char *qm = iq_matrix->quantiser_matrix[pic_param->components[index].quantiser_table_selector];
-        int precision = iq_matrix->precision[pic_param->components[index].quantiser_table_selector];
+        unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
         unsigned char raster_qm[64];
         int j;
 
-        assert(precision == 0);
+        if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
+            continue;
 
         for (j = 0; j < 64; j++)
             raster_qm[zigzag_direct[j]] = qm[j];
@@ -2150,9 +2125,9 @@ gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
 
 static void
 gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
-                         VAPictureParameterBufferJPEG *pic_param,
-                         VASliceParameterBufferJPEG *slice_param,
-                         VASliceParameterBufferJPEG *next_slice_param,
+                         VAPictureParameterBufferJPEGBaseline *pic_param,
+                         VASliceParameterBufferJPEGBaseline *slice_param,
+                         VASliceParameterBufferJPEGBaseline *next_slice_param,
                          dri_bo *slice_data_bo,
                          struct gen7_mfd_context *gen7_mfd_context)
 {
@@ -2165,7 +2140,7 @@ gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
     assert(slice_param->num_components <= pic_param->num_components);
 
     for (i = 0; i < slice_param->num_components; i++) {
-        switch (slice_param->components[i].component_id - pic_param->components[0].component_id + 1) {
+        switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
         case 1:
             scan_component_mask |= (1 << 0);
             break;
@@ -2616,17 +2591,15 @@ gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
                              struct gen7_mfd_context *gen7_mfd_context)
 {
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
-    VAPictureParameterBufferJPEG *pic_param;
-    VASliceParameterBufferJPEG *slice_param, *next_slice_param, *next_slice_group_param;
+    VAPictureParameterBufferJPEGBaseline *pic_param;
+    VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
     dri_bo *slice_data_bo;
     int i, j, max_selector = 0;
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
-    pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
+    pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
 
     /* Currently only support Baseline DCT */
-    assert(pic_param->type == VA_JPEG_SOF0);
-    assert(pic_param->sample_precision == 8);
     gen7_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
     gen7_mfd_jpeg_wa(ctx, gen7_mfd_context);
@@ -2639,14 +2612,14 @@ gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
 
     for (j = 0; j < decode_state->num_slice_params; j++) {
         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
-        slice_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j]->buffer;
+        slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
         slice_data_bo = decode_state->slice_datas[j]->bo;
         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
 
         if (j == decode_state->num_slice_params - 1)
             next_slice_group_param = NULL;
         else
-            next_slice_group_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j + 1]->buffer;
+            next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
 
         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
             int component;
@@ -2659,11 +2632,11 @@ gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
                 next_slice_param = next_slice_group_param;
 
             for (component = 0; component < slice_param->num_components; component++) {
-                if (max_selector < slice_param->components[component].dc_selector)
-                    max_selector = slice_param->components[component].dc_selector;
+                if (max_selector < slice_param->components[component].dc_table_selector)
+                    max_selector = slice_param->components[component].dc_table_selector;
 
-                if (max_selector < slice_param->components[component].ac_selector)
-                    max_selector = slice_param->components[component].ac_selector;
+                if (max_selector < slice_param->components[component].ac_table_selector)
+                    max_selector = slice_param->components[component].ac_table_selector;
             }
 
             slice_param++;
@@ -2675,14 +2648,14 @@ gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
 
     for (j = 0; j < decode_state->num_slice_params; j++) {
         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
-        slice_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j]->buffer;
+        slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
         slice_data_bo = decode_state->slice_datas[j]->bo;
         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
 
         if (j == decode_state->num_slice_params - 1)
             next_slice_group_param = NULL;
         else
-            next_slice_group_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j + 1]->buffer;
+            next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
 
         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
@@ -2734,7 +2707,7 @@ gen7_mfd_decode_picture(VADriverContextP ctx,
         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
         break;
 
-#ifdef HAVE_JPEG_DECODING
+#ifdef HAVE_VA_JPEG_DECODE
     case VAProfileJPEGBaseline:
         gen7_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
         break;
@@ -2796,7 +2769,7 @@ gen7_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
 
     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
-    gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
+    gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
 
     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
diff --git a/src/gen7_mfd.h b/src/gen7_mfd.h
index 08d9e3a..0700c80 100644
--- a/src/gen7_mfd.h
+++ b/src/gen7_mfd.h
@@ -35,13 +35,6 @@
 #include <intel_bufmgr.h>
 #include "i965_decoder.h"
 
-struct gen7_avc_surface
-{
-    dri_bo *dmv_top;
-    dri_bo *dmv_bottom;
-    int dmv_bottom_flag;
-};
-
 #define GEN7_VC1_I_PICTURE              0
 #define GEN7_VC1_P_PICTURE              1
 #define GEN7_VC1_B_PICTURE              2
diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c
index b2b6c92..a118076 100644
--- a/src/i965_avc_bsd.c
+++ b/src/i965_avc_bsd.c
@@ -30,6 +30,10 @@
 #include <string.h>
 #include <assert.h>
 
+#ifndef HAVE_GEN_AVC_SURFACE
+#define HAVE_GEN_AVC_SURFACE 1
+#endif
+
 #include "intel_batchbuffer.h"
 #include "intel_driver.h"
 
@@ -40,23 +44,6 @@
 #include "i965_media.h"
 #include "i965_decoder_utils.h"
 
-static void 
-i965_avc_bsd_free_avc_bsd_surface(void **data)
-{
-    struct i965_avc_bsd_surface *avc_bsd_surface = *data;
-
-    if (!avc_bsd_surface)
-        return;
-
-    dri_bo_unreference(avc_bsd_surface->dmv_top);
-    avc_bsd_surface->dmv_top = NULL;
-    dri_bo_unreference(avc_bsd_surface->dmv_bottom);
-    avc_bsd_surface->dmv_bottom = NULL;
-
-    free(avc_bsd_surface);
-    *data = NULL;
-}
-
 static void
 i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx, 
                                   struct object_surface *obj_surface,
@@ -64,18 +51,16 @@ i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx,
                                   struct i965_h264_context *i965_h264_context)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_avc_bsd_context *i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
-    struct i965_avc_bsd_surface *avc_bsd_surface = obj_surface->private_data;
+    GenAvcSurface *avc_bsd_surface = obj_surface->private_data;
 
-    obj_surface->free_private_data = i965_avc_bsd_free_avc_bsd_surface;
+    obj_surface->free_private_data = gen_free_avc_surface;
 
     if (!avc_bsd_surface) {
-        avc_bsd_surface = calloc(sizeof(struct i965_avc_bsd_surface), 1);
+        avc_bsd_surface = calloc(sizeof(GenAvcSurface), 1);
         assert((obj_surface->size & 0x3f) == 0);
         obj_surface->private_data = avc_bsd_surface;
     }
 
-    avc_bsd_surface->ctx = i965_avc_bsd_context;
     avc_bsd_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
                                         !pic_param->seq_fields.bits.direct_8x8_inference_flag);
 
@@ -404,7 +389,7 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx,
     int i, j;
     VAPictureH264 *va_pic;
     struct object_surface *obj_surface;
-    struct i965_avc_bsd_surface *avc_bsd_surface;
+    GenAvcSurface *avc_bsd_surface;
 
     i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
 
diff --git a/src/i965_avc_bsd.h b/src/i965_avc_bsd.h
index 25606ba..7f83007 100644
--- a/src/i965_avc_bsd.h
+++ b/src/i965_avc_bsd.h
@@ -42,14 +42,6 @@ struct i965_avc_bsd_context
     } mpr_row_store;
 };
 
-struct i965_avc_bsd_surface
-{
-    struct i965_avc_bsd_context *ctx;
-    dri_bo *dmv_top;
-    dri_bo *dmv_bottom;
-    int dmv_bottom_flag;
-};
-
 void i965_avc_bsd_pipeline(VADriverContextP, struct decode_state *, void *h264_context);
 void i965_avc_bsd_decode_init(VADriverContextP, void *h264_context);
 Bool i965_avc_bsd_ternimate(struct i965_avc_bsd_context *);
diff --git a/src/i965_decoder.h b/src/i965_decoder.h
index 0e69e14..0226707 100644
--- a/src/i965_decoder.h
+++ b/src/i965_decoder.h
@@ -26,6 +26,8 @@
 #define I965_DECODER_H
 
 #include <stdint.h>
+#include <stdlib.h>
+
 #include <va/va.h>
 #include <intel_bufmgr.h>
 
@@ -43,4 +45,47 @@ struct gen_buffer {
     int         valid;
 };
 
+#if HAVE_GEN_AVC_SURFACE
+
+static pthread_mutex_t free_avc_surface_lock = PTHREAD_MUTEX_INITIALIZER;
+
+typedef struct gen_avc_surface GenAvcSurface;
+struct gen_avc_surface
+{
+    dri_bo *dmv_top;
+    dri_bo *dmv_bottom;
+    int dmv_bottom_flag;
+};
+
+static void 
+gen_free_avc_surface(void **data)
+{
+    GenAvcSurface *avc_surface;
+
+    pthread_mutex_lock(&free_avc_surface_lock);
+
+    avc_surface = *data;
+
+    if (!avc_surface) {
+        pthread_mutex_unlock(&free_avc_surface_lock);
+        return;
+    }
+
+
+    dri_bo_unreference(avc_surface->dmv_top);
+    avc_surface->dmv_top = NULL;
+    dri_bo_unreference(avc_surface->dmv_bottom);
+    avc_surface->dmv_bottom = NULL;
+
+    free(avc_surface);
+    *data = NULL;
+
+    pthread_mutex_unlock(&free_avc_surface_lock);
+}
+
+#endif
+
+extern struct hw_context *
+gen75_dec_hw_context_init(VADriverContextP ctx, VAProfile profile);
+
 #endif /* I965_DECODER_H */
diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
index 8450d23..6326796 100644
--- a/src/i965_decoder_utils.c
+++ b/src/i965_decoder_utils.c
@@ -21,9 +21,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <assert.h>
-#include <stddef.h>
-#include <string.h>
+#include "sysdeps.h"
 #include <alloca.h>
 #include "intel_batchbuffer.h"
 #include "i965_decoder_utils.h"
diff --git a/src/i965_defines.h b/src/i965_defines.h
index b58260a..5988949 100644
--- a/src/i965_defines.h
+++ b/src/i965_defines.h
@@ -234,7 +234,9 @@
 # define GEN7_PS_FLOATING_POINT_MODE_ALT                (1 << 16)
 /* DW3: scratch space */
 /* DW4 */
-# define GEN7_PS_MAX_THREADS_SHIFT                      23
+# define GEN7_PS_MAX_THREADS_SHIFT_IVB                  24
+# define GEN7_PS_MAX_THREADS_SHIFT_HSW                  23
+# define GEN7_PS_SAMPLE_MASK_SHIFT_HSW                  12
 # define GEN7_PS_PUSH_CONSTANT_ENABLE                   (1 << 11)
 # define GEN7_PS_ATTRIBUTE_ENABLE                       (1 << 10)
 # define GEN7_PS_OMASK_TO_RENDER_TARGET                 (1 << 9)
@@ -294,6 +296,7 @@
 #define MFX_AVC_REF_IDX_STATE                   MFX(2, 1, 0, 4)
 #define MFX_AVC_WEIGHTOFFSET_STATE              MFX(2, 1, 0, 5)
 
+#define MFD_AVC_PICID_STATE                     MFX(2, 1, 1, 5)
 #define MFD_AVC_BSD_OBJECT                      MFX(2, 1, 1, 8)
 
 #define MFC_AVC_FQM_STATE                       MFX(2, 1, 2, 2)
@@ -319,6 +322,17 @@
 
 #define MFD_JPEG_BSD_OBJECT                     MFX(2, 7, 1, 8)
 
+#define VEB(pipeline, op, sub_opa, sub_opb)     \
+     (3 << 29 |                                 \
+     (pipeline) << 27 |                         \
+     (op) << 24 |                               \
+     (sub_opa) << 21 |                          \
+     (sub_opb) << 16)
+
+#define VEB_SURFACE_STATE                       VEB(2, 4, 0, 0)
+#define VEB_STATE                               VEB(2, 4, 0, 2)
+#define VEB_DNDI_IECP_STATE                     VEB(2, 4, 0, 3)
+
 #define I965_DEPTHFORMAT_D32_FLOAT              1
 
 #define BASE_ADDRESS_MODIFY             (1 << 0)
@@ -525,6 +539,13 @@
 #define I965_MIPFILTER_NEAREST     1   
 #define I965_MIPFILTER_LINEAR      3
 
+#define HSW_SCS_ZERO                      0
+#define HSW_SCS_ONE                       1
+#define HSW_SCS_RED                       4
+#define HSW_SCS_GREEN                     5
+#define HSW_SCS_BLUE                      6
+#define HSW_SCS_ALPHA                     7
+
 #define I965_TEXCOORDMODE_WRAP            0
 #define I965_TEXCOORDMODE_MIRROR          1
 #define I965_TEXCOORDMODE_CLAMP           2
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 0526efc..1180114 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -27,19 +27,23 @@
  *
  */
 
-#include "config.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
+#include "sysdeps.h"
 
-#include <va/va_dricommon.h>
+#ifdef HAVE_VA_X11
+# include "i965_output_dri.h"
+#endif
+
+#ifdef HAVE_VA_WAYLAND
+# include "i965_output_wayland.h"
+#endif
 
 #include "intel_driver.h"
 #include "intel_memman.h"
 #include "intel_batchbuffer.h"
 #include "i965_defines.h"
 #include "i965_drv_video.h"
+#include "i965_decoder.h"
+#include "i965_encoder.h"
 
 #define CONFIG_ID_OFFSET                0x01000000
 #define CONTEXT_ID_OFFSET               0x02000000
@@ -74,12 +78,40 @@
 #define HAS_JPEG(ctx)   (IS_GEN7((ctx)->intel.device_id) &&     \
                          (ctx)->intel.has_bsd)
 
+#define HAS_ACCELERATED_GETIMAGE(ctx)   (IS_GEN6((ctx)->intel.device_id) ||     \
+                                         IS_GEN7((ctx)->intel.device_id))
+
+#define HAS_ACCELERATED_PUTIMAGE(ctx)   HAS_VPP(ctx)
+
+#if VA_CHECK_VERSION(0,33,0)
+/* Check whether we are rendering to X11 (VA/X11 or VA/GLX API) */
+#define IS_VA_X11(ctx) \
+    (((ctx)->display_type & VA_DISPLAY_MAJOR_MASK) == VA_DISPLAY_X11)
+
+/* Check whether we are rendering to Wayland */
+#define IS_VA_WAYLAND(ctx) \
+    (((ctx)->display_type & VA_DISPLAY_MAJOR_MASK) == VA_DISPLAY_WAYLAND)
+#else
+/* Previous VA-API versions only supported VA/X11 (and VA/GLX) API */
+#define IS_VA_X11(ctx)          1
+#define IS_VA_WAYLAND(ctx)      0
+#endif
+
 enum {
     I965_SURFACETYPE_RGBA = 1,
     I965_SURFACETYPE_YUV,
     I965_SURFACETYPE_INDEXED
 };
 
+/* List of supported display attributes */
+static const VADisplayAttribute i965_display_attributes[] = {
+    {
+        VADisplayAttribRotation,
+        0, 3, VA_ROTATION_NONE,
+        VA_DISPLAY_ATTRIB_GETTABLE|VA_DISPLAY_ATTRIB_SETTABLE
+    },
+};
+
 /* List of supported image formats */
 typedef struct {
     unsigned int        type;
@@ -173,6 +205,13 @@ static struct hw_codec_info gen7_hw_codec_info = {
     .max_height = 4096,
 };
 
+static struct hw_codec_info gen75_hw_codec_info = {
+    .dec_hw_context_init = gen75_dec_hw_context_init,
+    .enc_hw_context_init = gen75_enc_hw_context_init,
+    .max_width = 4096,
+    .max_height = 4096,
+};
+
 VAStatus 
 i965_QueryConfigProfiles(VADriverContextP ctx,
                          VAProfile *profile_list,       /* out */
@@ -198,9 +237,11 @@ i965_QueryConfigProfiles(VADriverContextP ctx,
         profile_list[i++] = VAProfileVC1Advanced;
     }
 
+#ifdef HAVE_VA_JPEG_DECODE
     if (HAS_JPEG(i965)) {
         profile_list[i++] = VAProfileJPEGBaseline;
     }
+#endif
 
     /* If the assert fails then I965_MAX_PROFILES needs to be bigger */
     assert(i <= I965_MAX_PROFILES);
@@ -506,8 +547,14 @@ i965_CreateSurfaces(VADriverContextP ctx,
         obj_surface->orig_width = width;
         obj_surface->orig_height = height;
 
-        obj_surface->width = ALIGN(width, 16);
-        obj_surface->height = ALIGN(height, 16);
+	if (IS_G4X(i965->intel.device_id) || IS_IRONLAKE(i965->intel.device_id)) {
+	        obj_surface->width = ALIGN(width, 16);
+        	obj_surface->height = ALIGN(height, 16);
+	} else {
+	        obj_surface->width = ALIGN(width, 128);
+        	obj_surface->height = ALIGN(height, 32);
+	}
+
         obj_surface->flags = SURFACE_REFERENCED;
         obj_surface->fourcc = 0;
         obj_surface->bo = NULL;
@@ -984,8 +1031,8 @@ i965_create_buffer_internal(VADriverContextP ctx,
     case VAEncSequenceParameterBufferType:
     case VAEncPictureParameterBufferType:
     case VAEncSliceParameterBufferType:
-#ifdef HAVE_JPEG_DECODING
-    case VAHuffmanTableBufferType:
+#ifdef HAVE_VA_JPEG_DECODE
+     case VAHuffmanTableBufferType:
 #endif
         /* Ok */
         break;
@@ -1334,7 +1381,7 @@ i965_decoder_render_picture(VADriverContextP ctx,
             vaStatus = I965_RENDER_DECODE_BUFFER(slice_data);
             break;
 
-#ifdef HAVE_JPEG_DECODING
+#ifdef HAVE_VA_JPEG_DECODE
         case VAHuffmanTableBufferType:
             vaStatus = I965_RENDER_DECODE_BUFFER(huffman_table);
             break;
@@ -1479,6 +1526,9 @@ i965_SyncSurface(VADriverContextP ctx,
 
     assert(obj_surface);
 
+    if(obj_surface->bo)
+        drm_intel_bo_wait_rendering(obj_surface->bo);
+
     return VA_STATUS_SUCCESS;
 }
 
@@ -1492,19 +1542,70 @@ i965_QuerySurfaceStatus(VADriverContextP ctx,
 
     assert(obj_surface);
 
-    /* Usually GEM will handle synchronization with the graphics hardware */
-#if 0
     if (obj_surface->bo) {
-        dri_bo_map(obj_surface->bo, 0);
-        dri_bo_unmap(obj_surface->bo);
+        if (drm_intel_bo_busy(obj_surface->bo)){
+            *status = VASurfaceRendering;
+        }
+        else {
+            *status = VASurfaceReady;
+        }
+    } else {
+        *status = VASurfaceReady;
     }
-#endif
-    
-    *status = obj_surface->status;
 
     return VA_STATUS_SUCCESS;
 }
 
+static VADisplayAttribute *
+get_display_attribute(VADriverContextP ctx, VADisplayAttribType type)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);
+    unsigned int i;
+
+    if (!i965->display_attributes)
+        return NULL;
+
+    for (i = 0; i < i965->num_display_attributes; i++) {
+        if (i965->display_attributes[i].type == type)
+            return &i965->display_attributes[i];
+    }
+    return NULL;
+}
+
+static bool
+i965_display_attributes_init(VADriverContextP ctx)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);
+
+    i965->num_display_attributes = ARRAY_ELEMS(i965_display_attributes);
+    i965->display_attributes = malloc(
+        i965->num_display_attributes * sizeof(i965->display_attributes[0]));
+    if (!i965->display_attributes)
+        return false;
+
+    memcpy(
+        i965->display_attributes,
+        i965_display_attributes,
+        sizeof(i965_display_attributes)
+    );
+
+    i965->rotation_attrib = get_display_attribute(ctx, VADisplayAttribRotation);
+    if (!i965->rotation_attrib)
+        return false;
+    return true;
+}
+
+static void
+i965_display_attributes_terminate(VADriverContextP ctx)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);
+
+    if (i965->display_attributes) {
+        free(i965->display_attributes);
+        i965->display_attributes = NULL;
+        i965->num_display_attributes = 0;
+    }
+}
 
 /* 
  * Query display attributes 
@@ -1513,12 +1614,19 @@ i965_QuerySurfaceStatus(VADriverContextP ctx,
  * returned in "attr_list" is returned in "num_attributes".
  */
 VAStatus 
-i965_QueryDisplayAttributes(VADriverContextP ctx,
-                            VADisplayAttribute *attr_list,    /* out */
-                            int *num_attributes)              /* out */
+i965_QueryDisplayAttributes(
+    VADriverContextP    ctx,
+    VADisplayAttribute *attribs,        /* out */
+    int                *num_attribs_ptr /* out */
+)
 {
-    if (num_attributes)
-        *num_attributes = 0;
+    const int num_attribs = ARRAY_ELEMS(i965_display_attributes);
+
+    if (attribs && num_attribs > 0)
+        memcpy(attribs, i965_display_attributes, sizeof(i965_display_attributes));
+
+    if (num_attribs_ptr)
+        *num_attribs_ptr = num_attribs;
 
     return VA_STATUS_SUCCESS;
 }
@@ -1530,12 +1638,27 @@ i965_QueryDisplayAttributes(VADriverContextP ctx,
  * from vaQueryDisplayAttributes() can have their values retrieved.  
  */
 VAStatus 
-i965_GetDisplayAttributes(VADriverContextP ctx,
-                          VADisplayAttribute *attr_list,    /* in/out */
-                          int num_attributes)
+i965_GetDisplayAttributes(
+    VADriverContextP    ctx,
+    VADisplayAttribute *attribs,        /* inout */
+    int                 num_attribs     /* in */
+)
 {
-    /* TODO */
-    return VA_STATUS_ERROR_UNIMPLEMENTED;
+    int i;
+
+    for (i = 0; i < num_attribs; i++) {
+        VADisplayAttribute *src_attrib, * const dst_attrib = &attribs[i];
+
+        src_attrib = get_display_attribute(ctx, dst_attrib->type);
+        if (src_attrib && (src_attrib->flags & VA_DISPLAY_ATTRIB_GETTABLE)) {
+            dst_attrib->min_value = src_attrib->min_value;
+            dst_attrib->max_value = src_attrib->max_value;
+            dst_attrib->value     = src_attrib->value;
+        }
+        else
+            dst_attrib->flags = VA_DISPLAY_ATTRIB_NOT_SUPPORTED;
+    }
+    return VA_STATUS_SUCCESS;
 }
 
 /* 
@@ -1545,12 +1668,32 @@ i965_GetDisplayAttributes(VADriverContextP ctx,
  * the value is out of range, the function returns VA_STATUS_ERROR_ATTR_NOT_SUPPORTED
  */
 VAStatus 
-i965_SetDisplayAttributes(VADriverContextP ctx,
-                          VADisplayAttribute *attr_list,
-                          int num_attributes)
+i965_SetDisplayAttributes(
+    VADriverContextP    ctx,
+    VADisplayAttribute *attribs,        /* in */
+    int                 num_attribs     /* in */
+)
 {
-    /* TODO */
-    return VA_STATUS_ERROR_UNIMPLEMENTED;
+    int i;
+
+    for (i = 0; i < num_attribs; i++) {
+        VADisplayAttribute *dst_attrib, * const src_attrib = &attribs[i];
+
+        dst_attrib = get_display_attribute(ctx, src_attrib->type);
+        if (!dst_attrib)
+            return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
+
+        if (!(dst_attrib->flags & VA_DISPLAY_ATTRIB_SETTABLE))
+            continue;
+
+        if (src_attrib->value < dst_attrib->min_value ||
+            src_attrib->value > dst_attrib->max_value)
+            return VA_STATUS_ERROR_INVALID_PARAMETER;
+
+        dst_attrib->value = src_attrib->value;
+        /* XXX: track modified attributes through timestamps */
+    }
+    return VA_STATUS_SUCCESS;
 }
 
 VAStatus 
@@ -1571,7 +1714,9 @@ i965_Init(VADriverContextP ctx)
     if (intel_driver_init(ctx) == False)
         return VA_STATUS_ERROR_UNKNOWN;
 
-    if (IS_G4X(i965->intel.device_id))
+    if (IS_HASWELL(i965->intel.device_id))
+	i965->codec_info = &gen75_hw_codec_info;
+    else if (IS_G4X(i965->intel.device_id))
         i965->codec_info = &g4x_hw_codec_info;
     else if (IS_IRONLAKE(i965->intel.device_id))
         i965->codec_info = &ironlake_hw_codec_info;
@@ -1582,14 +1727,28 @@ i965_Init(VADriverContextP ctx)
     else
         return VA_STATUS_ERROR_UNKNOWN;
 
+    i965->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
+
+    if (!i965_display_attributes_init(ctx))
+        return VA_STATUS_ERROR_UNKNOWN;
+
     if (i965_post_processing_init(ctx) == False)
         return VA_STATUS_ERROR_UNKNOWN;
 
     if (i965_render_init(ctx) == False)
         return VA_STATUS_ERROR_UNKNOWN;
 
+#ifdef HAVE_VA_WAYLAND
+    if (IS_VA_WAYLAND(ctx) && !i965_output_wayland_init(ctx))
+        return VA_STATUS_ERROR_UNKNOWN;
+#endif
+
+#ifdef HAVE_VA_X11
+    if (IS_VA_X11(ctx) && !i965_output_dri_init(ctx))
+        return VA_STATUS_ERROR_UNKNOWN;
+#endif
+
     _i965InitMutex(&i965->render_mutex);
-    i965->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER);
 
     return VA_STATUS_SUCCESS;
 }
@@ -2274,109 +2433,25 @@ i965_PutSurface(VADriverContextP ctx,
                 unsigned int number_cliprects, /* number of clip rects in the clip list */
                 unsigned int flags) /* de-interlacing flags */
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx); 
-    struct dri_state *dri_state = (struct dri_state *)ctx->dri_state;
-    struct i965_render_state *render_state = &i965->render_state;
-    struct dri_drawable *dri_drawable;
-    union dri_buffer *buffer;
-    struct intel_region *dest_region;
-    struct object_surface *obj_surface; 
-    VARectangle src_rect, dst_rect;
-    int ret;
-    uint32_t name;
-    Bool new_region = False;
-    int pp_flag = 0;
-
-    /* Currently don't support DRI1 */
-    if (dri_state->driConnectedFlag != VA_DRI2)
-        return VA_STATUS_ERROR_UNKNOWN;
-
-    /* Some broken sources such as H.264 conformance case FM2_SVA_C
-     * will get here
-     */
-    obj_surface = SURFACE(surface);
-    if (!obj_surface || !obj_surface->bo)
-        return VA_STATUS_SUCCESS;
-
-    _i965LockMutex(&i965->render_mutex);
-
-    dri_drawable = dri_get_drawable(ctx, (Drawable)draw);
-    assert(dri_drawable);
-
-    buffer = dri_get_rendering_buffer(ctx, dri_drawable);
-    assert(buffer);
-    
-    dest_region = render_state->draw_region;
-
-    if (dest_region) {
-        assert(dest_region->bo);
-        dri_bo_flink(dest_region->bo, &name);
-        
-        if (buffer->dri2.name != name) {
-            new_region = True;
-            dri_bo_unreference(dest_region->bo);
-        }
-    } else {
-        dest_region = (struct intel_region *)calloc(1, sizeof(*dest_region));
-        assert(dest_region);
-        render_state->draw_region = dest_region;
-        new_region = True;
-    }
-
-    if (new_region) {
-        dest_region->x = dri_drawable->x;
-        dest_region->y = dri_drawable->y;
-        dest_region->width = dri_drawable->width;
-        dest_region->height = dri_drawable->height;
-        dest_region->cpp = buffer->dri2.cpp;
-        dest_region->pitch = buffer->dri2.pitch;
-
-        dest_region->bo = intel_bo_gem_create_from_name(i965->intel.bufmgr, "rendering buffer", buffer->dri2.name);
-        assert(dest_region->bo);
-
-        ret = dri_bo_get_tiling(dest_region->bo, &(dest_region->tiling), &(dest_region->swizzle));
-        assert(ret == 0);
-    }
-
-    if ((flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC)
-        pp_flag |= I965_PP_FLAG_AVS;
-
-    if (flags & VA_TOP_FIELD)
-        pp_flag |= I965_PP_FLAG_TOP_FIELD;
-    else if (flags & VA_BOTTOM_FIELD)
-        pp_flag |= I965_PP_FLAG_BOTTOM_FIELD;
-
-    src_rect.x      = srcx;
-    src_rect.y      = srcy;
-    src_rect.width  = srcw;
-    src_rect.height = srch;
+#ifdef HAVE_VA_X11
+    if (IS_VA_X11(ctx)) {
+        VARectangle src_rect, dst_rect;
 
-    dst_rect.x      = destx;
-    dst_rect.y      = desty;
-    dst_rect.width  = destw;
-    dst_rect.height = desth;
+        src_rect.x      = srcx;
+        src_rect.y      = srcy;
+        src_rect.width  = srcw;
+        src_rect.height = srch;
 
-    intel_render_put_surface(ctx, surface, &src_rect, &dst_rect, pp_flag);
+        dst_rect.x      = destx;
+        dst_rect.y      = desty;
+        dst_rect.width  = destw;
+        dst_rect.height = desth;
 
-    if(obj_surface->subpic != VA_INVALID_ID) {
-        intel_render_put_subpicture(ctx, surface, &src_rect, &dst_rect);
+        return i965_put_surface_dri(ctx, surface, draw, &src_rect, &dst_rect,
+                                    cliprects, number_cliprects, flags);
     }
-
-    dri_swap_buffer(ctx, dri_drawable);
-    obj_surface->flags |= SURFACE_DISPLAYED;
-
-    if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
-        dri_bo_unreference(obj_surface->bo);
-        obj_surface->bo = NULL;
-        obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
-
-        if (obj_surface->free_private_data)
-            obj_surface->free_private_data(&obj_surface->private_data);
-    }
-
-    _i965UnlockMutex(&i965->render_mutex);
-
-    return VA_STATUS_SUCCESS;
+#endif
+    return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
 VAStatus 
@@ -2389,14 +2464,23 @@ i965_Terminate(VADriverContextP ctx)
 
     _i965DestroyMutex(&i965->render_mutex);
 
+#ifdef HAVE_VA_X11
+    if (IS_VA_X11(ctx))
+        i965_output_dri_terminate(ctx);
+#endif
+
+#ifdef HAVE_VA_WAYLAND
+    if (IS_VA_WAYLAND(ctx))
+        i965_output_wayland_terminate(ctx);
+#endif
+
     if (i965_render_terminate(ctx) == False)
         return VA_STATUS_ERROR_UNKNOWN;
 
     if (i965_post_processing_terminate(ctx) == False)
         return VA_STATUS_ERROR_UNKNOWN;
 
-    if (intel_driver_terminate(ctx) == False)
-        return VA_STATUS_ERROR_UNKNOWN;
+    i965_display_attributes_terminate(ctx);
 
     i965_destroy_heap(&i965->buffer_heap, i965_destroy_buffer);
     i965_destroy_heap(&i965->image_heap, i965_destroy_image);
@@ -2405,6 +2489,9 @@ i965_Terminate(VADriverContextP ctx)
     i965_destroy_heap(&i965->context_heap, i965_destroy_context);
     i965_destroy_heap(&i965->config_heap, i965_destroy_config);
 
+    if (intel_driver_terminate(ctx) == False)
+        return VA_STATUS_ERROR_UNKNOWN;
+
     free(ctx->pDriverData);
     ctx->pDriverData = NULL;
 
@@ -2529,11 +2616,11 @@ i965_UnlockSurface(
 
     if (obj_surface == NULL) {
         vaStatus = VA_STATUS_ERROR_INVALID_PARAMETER;   // Surface is absent
-        goto error;
+        return vaStatus;
     }
     if (obj_surface->locked_image_id == VA_INVALID_ID) {
         vaStatus = VA_STATUS_ERROR_INVALID_PARAMETER;   // Surface is not locked
-        goto error;
+        return vaStatus;
     }
 
     locked_img = IMAGE(obj_surface->locked_image_id);
@@ -2560,6 +2647,8 @@ i965_UnlockSurface(
     locked_img->image.image_id = VA_INVALID_ID;
 
  error:
+    obj_surface->locked_image_id = VA_INVALID_ID;
+
     return vaStatus;
 }
 
@@ -2580,7 +2669,7 @@ VA_DRIVER_INIT_FUNC(  VADriverContextP ctx )
     ctx->max_attributes = I965_MAX_CONFIG_ATTRIBUTES;
     ctx->max_image_formats = I965_MAX_IMAGE_FORMATS;
     ctx->max_subpic_formats = I965_MAX_SUBPIC_FORMATS;
-    ctx->max_display_attributes = I965_MAX_DISPLAY_ATTRIBUTES;
+    ctx->max_display_attributes = 1 + ARRAY_ELEMS(i965_display_attributes);
 
     vtable->vaTerminate = i965_Terminate;
     vtable->vaQueryConfigEntrypoints = i965_QueryConfigEntrypoints;
diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
index ab993bc..de4f747 100644
--- a/src/i965_drv_video.h
+++ b/src/i965_drv_video.h
@@ -42,11 +42,24 @@
 #define I965_MAX_CONFIG_ATTRIBUTES              10
 #define I965_MAX_IMAGE_FORMATS                  3
 #define I965_MAX_SUBPIC_FORMATS                 4
-#define I965_MAX_DISPLAY_ATTRIBUTES             4
 
 #define INTEL_STR_DRIVER_VENDOR                 "Intel"
 #define INTEL_STR_DRIVER_NAME                   "i965"
 
+#define I965_SURFACE_TYPE_IMAGE                   0
+#define I965_SURFACE_TYPE_SURFACE                 1
+
+#define I965_SURFACE_FLAG_FRAME                  0x00000000
+#define I965_SURFACE_FLAG_TOP_FIELD_FIRST        0x00000001
+#define I965_SURFACE_FLAG_BOTTOM_FIELD_FIRST     0x00000002
+
+struct i965_surface
+{
+    VAGenericID id;
+    int type;
+    int flags;
+};
+
 struct i965_kernel 
 {
     char *name;
@@ -232,6 +245,16 @@ struct i965_driver_data
     struct i965_render_state render_state;
     void *pp_context;
     char va_vendor[256];
+ 
+    VADisplayAttribute *display_attributes;
+    unsigned int num_display_attributes;
+    VADisplayAttribute *rotation_attrib;
+
+    /* VA/DRI (X11) specific data */
+    struct va_dri_output *dri_output;
+
+    /* VA/Wayland specific data */
+    struct va_wl_output *wl_output;
 };
 
 #define NEW_CONFIG_ID() object_heap_allocate(&i965->config_heap);
@@ -267,4 +290,11 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
                             unsigned int fourcc,
                             unsigned int subsampling);
 
+
+extern VAStatus i965_MapBuffer(VADriverContextP ctx,
+		VABufferID buf_id,       /* in */
+		void **pbuf);            /* out */
+
+extern VAStatus i965_UnmapBuffer(VADriverContextP ctx, VABufferID buf_id);
+
 #endif /* _I965_DRV_VIDEO_H_ */
diff --git a/src/i965_encoder.c b/src/i965_encoder.c
index c58eb21..6d58e39 100644
--- a/src/i965_encoder.c
+++ b/src/i965_encoder.c
@@ -37,6 +37,8 @@
 #include "i965_defines.h"
 #include "i965_drv_video.h"
 #include "i965_encoder.h"
+#include "gen6_vme.h"
+#include "gen6_mfc.h"
 
 static void 
 gen6_encoder_end_picture(VADriverContextP ctx, 
@@ -72,10 +74,53 @@ gen6_enc_hw_context_init(VADriverContextP ctx, VAProfile profile)
 
     gen6_encoder_context->base.destroy = gen6_encoder_context_destroy;
     gen6_encoder_context->base.run = gen6_encoder_end_picture;
-    gen6_encoder_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
+    gen6_encoder_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
 
     gen6_vme_context_init(ctx, &gen6_encoder_context->vme_context);
     gen6_mfc_context_init(ctx, &gen6_encoder_context->mfc_context);
 
     return (struct hw_context *)gen6_encoder_context;
 }
+
+static void 
+gen75_encoder_end_picture(VADriverContextP ctx, 
+                         VAProfile profile, 
+                         union codec_state *codec_state,
+                         struct hw_context *hw_context)
+{
+    struct gen6_encoder_context *gen6_encoder_context = (struct gen6_encoder_context *)hw_context;
+    struct encode_state *encode_state = &codec_state->encode;
+    VAStatus vaStatus;
+
+    vaStatus = gen75_vme_pipeline(ctx, profile, encode_state, gen6_encoder_context);
+
+    if (vaStatus == VA_STATUS_SUCCESS)
+        gen75_mfc_pipeline(ctx, profile, encode_state, gen6_encoder_context);
+}
+static void
+gen75_encoder_context_destroy(void *hw_context)
+{
+    struct gen6_encoder_context *gen6_encoder_context = (struct gen6_encoder_context *)hw_context;
+
+    gen75_mfc_context_destroy(&gen6_encoder_context->mfc_context);
+    gen75_vme_context_destroy(&gen6_encoder_context->vme_context);
+    intel_batchbuffer_free(gen6_encoder_context->base.batch);
+    free(gen6_encoder_context);
+}
+
+
+struct hw_context *
+gen75_enc_hw_context_init(VADriverContextP ctx, VAProfile profile)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+    struct gen6_encoder_context *gen6_encoder_context = calloc(1, sizeof(struct gen6_encoder_context));
+
+    gen6_encoder_context->base.destroy = gen75_encoder_context_destroy;
+    gen6_encoder_context->base.run = gen75_encoder_end_picture;
+    gen6_encoder_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
+
+    gen75_vme_context_init(ctx, &gen6_encoder_context->vme_context);
+    gen75_mfc_context_init(ctx, &gen6_encoder_context->mfc_context);
+
+    return (struct hw_context *)gen6_encoder_context;
+}
diff --git a/src/i965_encoder.h b/src/i965_encoder.h
index 555efe3..fb989e0 100644
--- a/src/i965_encoder.h
+++ b/src/i965_encoder.h
@@ -46,6 +46,9 @@ struct gen6_encoder_context
     struct gen6_mfc_context mfc_context;
 };
 
+extern struct hw_context *
+gen75_enc_hw_context_init(VADriverContextP ctx, VAProfile profile);
+
 #endif	/* _I965_ENCODER_H_ */
 
 
diff --git a/src/i965_media.c b/src/i965_media.c
index f43feed..432f8ad 100644
--- a/src/i965_media.c
+++ b/src/i965_media.c
@@ -328,7 +328,7 @@ g4x_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
 
     media_context->base.destroy = i965_media_context_destroy;
     media_context->base.run = i965_media_decode_picture;
-    media_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
+    media_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
 
     switch (profile) {
     case VAProfileMPEG2Simple:
@@ -358,7 +358,7 @@ ironlake_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
 
     media_context->base.destroy = i965_media_context_destroy;
     media_context->base.run = i965_media_decode_picture;
-    media_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
+    media_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
 
     switch (profile) {
     case VAProfileMPEG2Simple:
diff --git a/src/i965_output_dri.c b/src/i965_output_dri.c
new file mode 100644
index 0000000..5757ce8
--- /dev/null
+++ b/src/i965_output_dri.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sysdeps.h"
+#include <va/va_dricommon.h>
+#include "i965_drv_video.h"
+#include "i965_output_dri.h"
+#include "dso_utils.h"
+
+#define LIBVA_X11_NAME "libva-x11.so.1"
+
+typedef struct dri_drawable *(*dri_get_drawable_func)(
+    VADriverContextP ctx, XID drawable);
+typedef union dri_buffer *(*dri_get_rendering_buffer_func)(
+    VADriverContextP ctx, struct dri_drawable *d);
+typedef void (*dri_swap_buffer_func)(
+    VADriverContextP ctx, struct dri_drawable *d);
+
+struct dri_vtable {
+    dri_get_drawable_func               get_drawable;
+    dri_get_rendering_buffer_func       get_rendering_buffer;
+    dri_swap_buffer_func                swap_buffer;
+};
+
+struct va_dri_output {
+    struct dso_handle  *handle;
+    struct dri_vtable   vtable;
+};
+
+bool
+i965_output_dri_init(VADriverContextP ctx)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx); 
+    struct dso_handle *dso_handle;
+    struct dri_vtable *dri_vtable;
+
+    static const struct dso_symbol symbols[] = {
+        { "dri_get_drawable",
+          offsetof(struct dri_vtable, get_drawable) },
+        { "dri_get_rendering_buffer",
+          offsetof(struct dri_vtable, get_rendering_buffer) },
+        { "dri_swap_buffer",
+          offsetof(struct dri_vtable, swap_buffer) },
+        { NULL, }
+    };
+
+    i965->dri_output = calloc(1, sizeof(struct va_dri_output));
+    if (!i965->dri_output)
+        goto error;
+
+    i965->dri_output->handle = dso_open(LIBVA_X11_NAME);
+    if (!i965->dri_output->handle)
+        goto error;
+
+    dso_handle = i965->dri_output->handle;
+    dri_vtable = &i965->dri_output->vtable;
+    if (!dso_get_symbols(dso_handle, dri_vtable, sizeof(*dri_vtable), symbols))
+        goto error;
+    return true;
+
+error:
+    i965_output_dri_terminate(ctx);
+    return false;
+}
+
+void
+i965_output_dri_terminate(VADriverContextP ctx)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx); 
+    struct va_dri_output * const dri_output = i965->dri_output;
+
+    if (!dri_output)
+        return;
+
+    if (dri_output->handle) {
+        dso_close(dri_output->handle);
+        dri_output->handle = NULL;
+    }
+
+    free(dri_output);
+    i965->dri_output = NULL;
+}
+
+VAStatus
+i965_put_surface_dri(
+    VADriverContextP    ctx,
+    VASurfaceID         surface,
+    void               *draw,
+    const VARectangle  *src_rect,
+    const VARectangle  *dst_rect,
+    const VARectangle  *cliprects,
+    unsigned int        num_cliprects,
+    unsigned int        flags
+)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx); 
+    struct dri_vtable * const dri_vtable = &i965->dri_output->vtable;
+    struct i965_render_state * const render_state = &i965->render_state;
+    struct dri_drawable *dri_drawable;
+    union dri_buffer *buffer;
+    struct intel_region *dest_region;
+    struct object_surface *obj_surface; 
+    unsigned int pp_flag = 0;
+    bool new_region = false;
+    uint32_t name;
+    int ret;
+
+    /* Currently don't support DRI1 */
+    if (!VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI2))
+        return VA_STATUS_ERROR_UNKNOWN;
+
+    /* Some broken sources such as H.264 conformance case FM2_SVA_C
+     * will get here
+     */
+    obj_surface = SURFACE(surface);
+    if (!obj_surface || !obj_surface->bo)
+        return VA_STATUS_SUCCESS;
+
+    _i965LockMutex(&i965->render_mutex);
+
+    dri_drawable = dri_vtable->get_drawable(ctx, (Drawable)draw);
+    assert(dri_drawable);
+
+    buffer = dri_vtable->get_rendering_buffer(ctx, dri_drawable);
+    assert(buffer);
+    
+    dest_region = render_state->draw_region;
+
+    if (dest_region) {
+        assert(dest_region->bo);
+        dri_bo_flink(dest_region->bo, &name);
+        
+        if (buffer->dri2.name != name) {
+            new_region = True;
+            dri_bo_unreference(dest_region->bo);
+        }
+    } else {
+        dest_region = (struct intel_region *)calloc(1, sizeof(*dest_region));
+        assert(dest_region);
+        render_state->draw_region = dest_region;
+        new_region = True;
+    }
+
+    if (new_region) {
+        dest_region->x = dri_drawable->x;
+        dest_region->y = dri_drawable->y;
+        dest_region->width = dri_drawable->width;
+        dest_region->height = dri_drawable->height;
+        dest_region->cpp = buffer->dri2.cpp;
+        dest_region->pitch = buffer->dri2.pitch;
+
+        dest_region->bo = intel_bo_gem_create_from_name(i965->intel.bufmgr, "rendering buffer", buffer->dri2.name);
+        assert(dest_region->bo);
+
+        ret = dri_bo_get_tiling(dest_region->bo, &(dest_region->tiling), &(dest_region->swizzle));
+        assert(ret == 0);
+    }
+
+    if ((flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC)
+        pp_flag |= I965_PP_FLAG_AVS;
+
+    if (flags & VA_TOP_FIELD)
+        pp_flag |= I965_PP_FLAG_TOP_FIELD;
+    else if (flags & VA_BOTTOM_FIELD)
+        pp_flag |= I965_PP_FLAG_BOTTOM_FIELD;
+
+    intel_render_put_surface(ctx, surface, src_rect, dst_rect, pp_flag);
+
+    if(obj_surface->subpic != VA_INVALID_ID) {
+        intel_render_put_subpicture(ctx, surface, src_rect, dst_rect);
+    }
+
+    dri_vtable->swap_buffer(ctx, dri_drawable);
+    obj_surface->flags |= SURFACE_DISPLAYED;
+
+    if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
+        dri_bo_unreference(obj_surface->bo);
+        obj_surface->bo = NULL;
+        obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
+
+        if (obj_surface->free_private_data)
+            obj_surface->free_private_data(&obj_surface->private_data);
+    }
+
+    _i965UnlockMutex(&i965->render_mutex);
+
+    return VA_STATUS_SUCCESS;
+}
diff --git a/src/i965_output_dri.h b/src/i965_output_dri.h
new file mode 100644
index 0000000..cf37b14
--- /dev/null
+++ b/src/i965_output_dri.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef I965_OUTPUT_DRI_H
+#define I965_OUTPUT_DRI_H
+
+#include <stdbool.h>
+#include <va/va_backend.h>
+
+bool
+i965_output_dri_init(VADriverContextP ctx);
+
+void
+i965_output_dri_terminate(VADriverContextP ctx);
+
+VAStatus
+i965_put_surface_dri(
+    VADriverContextP    ctx,
+    VASurfaceID         surface,
+    void               *draw,
+    const VARectangle  *src_rect,
+    const VARectangle  *dst_rect,
+    const VARectangle  *cliprects,
+    unsigned int        num_cliprects,
+    unsigned int        flags
+);
+
+#endif /* I965_OUTPUT_DRI_H */
diff --git a/src/i965_output_wayland.c b/src/i965_output_wayland.c
new file mode 100644
index 0000000..30ffb71
--- /dev/null
+++ b/src/i965_output_wayland.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <va/va_backend.h>
+#include <va/va_backend_wayland.h>
+#include <wayland-client.h>
+#include <wayland-drm-client-protocol.h>
+#include "intel_driver.h"
+#include "i965_output_wayland.h"
+#include "i965_drv_video.h"
+#include "i965_defines.h"
+#include "dso_utils.h"
+
+#define LIBEGL_NAME             "libEGL.so.1"
+#define LIBWAYLAND_CLIENT_NAME  "libwayland-client.so.0"
+
+typedef uint32_t (*wl_display_get_global_func)(struct wl_display *display,
+    const char *interface, uint32_t version);
+typedef void *(*wl_display_bind_func)(struct wl_display *display,
+    uint32_t name, const struct wl_interface *interface);
+typedef void (*wl_display_roundtrip_func)(struct wl_display *display);
+
+typedef struct wl_proxy *(*wl_proxy_create_func)(struct wl_proxy *factory,
+    const struct wl_interface *interface);
+typedef void (*wl_proxy_destroy_func)(struct wl_proxy *proxy);
+typedef void (*wl_proxy_marshal_func)(struct wl_proxy *p, uint32_t opcode, ...);
+
+struct wl_vtable {
+    const struct wl_interface  *buffer_interface;
+    const struct wl_interface  *drm_interface;
+    wl_display_get_global_func  display_get_global;
+    wl_display_bind_func        display_bind;
+    wl_display_roundtrip_func   display_roundtrip;
+    wl_proxy_create_func        proxy_create;
+    wl_proxy_destroy_func       proxy_destroy;
+    wl_proxy_marshal_func       proxy_marshal;
+};
+
+struct va_wl_output {
+    struct dso_handle  *libegl_handle;
+    struct dso_handle  *libwl_client_handle;
+    struct wl_vtable    vtable;
+    struct wl_drm      *wl_drm;
+};
+
+/* Ensure wl_drm instance is created */
+static bool
+ensure_wl_output(VADriverContextP ctx)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);
+    struct va_wl_output * const wl_output = i965->wl_output;
+    struct wl_vtable * const wl_vtable = &wl_output->vtable;
+    uint32_t id;
+
+    if (wl_output->wl_drm)
+        return true;
+
+    id = wl_vtable->display_get_global(ctx->native_dpy, "wl_drm", 1);
+    if (!id) {
+        wl_vtable->display_roundtrip(ctx->native_dpy);
+        id = wl_vtable->display_get_global(ctx->native_dpy, "wl_drm", 1);
+        if (!id)
+            return false;
+    }
+
+    wl_output->wl_drm =
+        wl_vtable->display_bind(ctx->native_dpy, id, wl_vtable->drm_interface);
+    if (!wl_output->wl_drm)
+        return false;
+    return true;
+}
+
+/* Create planar YUV buffer */
+static struct wl_buffer *
+create_planar_buffer(
+    struct va_wl_output *wl_output,
+    uint32_t             name,
+    int32_t              width,
+    int32_t              height,
+    uint32_t             format,
+    int32_t              offsets[3],
+    int32_t              pitches[3]
+)
+{
+    struct wl_vtable * const wl_vtable = &wl_output->vtable;
+    struct wl_proxy *id;
+
+    id = wl_vtable->proxy_create(
+        (struct wl_proxy *)wl_output->wl_drm,
+        wl_vtable->buffer_interface
+    );
+    if (!id)
+        return NULL;
+
+    wl_vtable->proxy_marshal(
+        (struct wl_proxy *)wl_output->wl_drm,
+        WL_DRM_CREATE_PLANAR_BUFFER,
+        id,
+        name,
+        width, height, format,
+        offsets[0], pitches[0],
+        offsets[1], pitches[1],
+        offsets[2], pitches[2]
+    );
+    return (struct wl_buffer *)id;
+}
+
+/* Hook to return Wayland buffer associated with the VA surface */
+static VAStatus
+va_GetSurfaceBufferWl(
+    struct VADriverContext *ctx,
+    VASurfaceID             surface,
+    unsigned int            flags,
+    struct wl_buffer      **out_buffer
+)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    struct wl_buffer *buffer;
+    uint32_t name, drm_format;
+    int offsets[3], pitches[3];
+
+    obj_surface = SURFACE(surface);
+    if (!obj_surface)
+        return VA_STATUS_ERROR_INVALID_SURFACE;
+
+    if (flags != VA_FRAME_PICTURE)
+        return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
+
+    if (!out_buffer)
+        return VA_STATUS_ERROR_INVALID_PARAMETER;
+
+    if (!ensure_wl_output(ctx))
+        return VA_STATUS_ERROR_INVALID_DISPLAY;
+
+    if (drm_intel_bo_flink(obj_surface->bo, &name) != 0)
+        return VA_STATUS_ERROR_INVALID_SURFACE;
+
+    switch (obj_surface->fourcc) {
+    case VA_FOURCC('N','V','1','2'):
+        drm_format = WL_DRM_FORMAT_NV12;
+        offsets[0] = 0;
+        pitches[0] = obj_surface->width;
+        offsets[1] = obj_surface->width * obj_surface->y_cb_offset;
+        pitches[1] = obj_surface->cb_cr_pitch;
+        offsets[2] = 0;
+        pitches[2] = 0;
+        break;
+    case VA_FOURCC('Y','V','1','2'):
+    case VA_FOURCC('I','4','2','0'):
+    case VA_FOURCC('I','M','C','1'):
+        switch (obj_surface->subsampling) {
+        case SUBSAMPLE_YUV411:
+            drm_format = WL_DRM_FORMAT_YUV411;
+            break;
+        case SUBSAMPLE_YUV420:
+            drm_format = WL_DRM_FORMAT_YUV420;
+            break;
+        case SUBSAMPLE_YUV422H:
+        case SUBSAMPLE_YUV422V:
+            drm_format = WL_DRM_FORMAT_YUV422;
+            break;
+        case SUBSAMPLE_YUV444:
+            drm_format = WL_DRM_FORMAT_YUV444;
+            break;
+        default:
+            assert(0 && "unsupported subsampling");
+            return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
+        }
+        offsets[0] = 0;
+        pitches[0] = obj_surface->width;
+        offsets[1] = obj_surface->width * obj_surface->y_cb_offset;
+        pitches[1] = obj_surface->cb_cr_pitch;
+        offsets[2] = obj_surface->width * obj_surface->y_cr_offset;
+        pitches[2] = obj_surface->cb_cr_pitch;
+        break;
+    default:
+        assert(0 && "unsupported format");
+        return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
+    }
+
+    buffer = create_planar_buffer(
+        i965->wl_output,
+        name,
+        obj_surface->orig_width,
+        obj_surface->orig_height,
+        drm_format,
+        offsets,
+        pitches
+    );
+    if (!buffer)
+        return VA_STATUS_ERROR_ALLOCATION_FAILED;
+
+    *out_buffer = buffer;
+    return VA_STATUS_SUCCESS;
+}
+
+/* Hook to return Wayland buffer associated with the VA image */
+static VAStatus
+va_GetImageBufferWl(
+    struct VADriverContext *ctx,
+    VAImageID               image,
+    unsigned int            flags,
+    struct wl_buffer      **out_buffer
+)
+{
+    return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+bool
+ensure_driver_vtable(VADriverContextP ctx)
+{
+    struct VADriverVTableWayland * const vtable = ctx->vtable_wayland;
+
+    if (!vtable)
+        return false;
+
+    vtable->vaGetSurfaceBufferWl = va_GetSurfaceBufferWl;
+    vtable->vaGetImageBufferWl   = va_GetImageBufferWl;
+    return true;
+}
+
+bool
+i965_output_wayland_init(VADriverContextP ctx)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);
+    struct dso_handle *dso_handle;
+    struct wl_vtable *wl_vtable;
+
+    static const struct dso_symbol libegl_symbols[] = {
+        { "wl_drm_interface",
+          offsetof(struct wl_vtable, drm_interface) },
+        { NULL, }
+    };
+
+    static const struct dso_symbol libwl_client_symbols[] = {
+        { "wl_buffer_interface",
+          offsetof(struct wl_vtable, buffer_interface) },
+        { "wl_display_get_global",
+          offsetof(struct wl_vtable, display_get_global) },
+        { "wl_display_bind",
+          offsetof(struct wl_vtable, display_bind) },
+        { "wl_display_roundtrip",
+          offsetof(struct wl_vtable, display_roundtrip) },
+        { "wl_proxy_create",
+          offsetof(struct wl_vtable, proxy_create) },
+        { "wl_proxy_destroy",
+          offsetof(struct wl_vtable, proxy_destroy) },
+        { "wl_proxy_marshal",
+          offsetof(struct wl_vtable, proxy_marshal) },
+        { NULL, }
+    };
+
+    if (ctx->display_type != VA_DISPLAY_WAYLAND)
+        return false;
+
+    i965->wl_output = calloc(1, sizeof(struct va_wl_output));
+    if (!i965->wl_output)
+        goto error;
+
+    i965->wl_output->libegl_handle = dso_open(LIBEGL_NAME);
+    if (!i965->wl_output->libegl_handle)
+        goto error;
+
+    dso_handle = i965->wl_output->libegl_handle;
+    wl_vtable  = &i965->wl_output->vtable;
+    if (!dso_get_symbols(dso_handle, wl_vtable, sizeof(*wl_vtable),
+                         libegl_symbols))
+        goto error;
+
+    i965->wl_output->libwl_client_handle = dso_open(LIBWAYLAND_CLIENT_NAME);
+    if (!i965->wl_output->libwl_client_handle)
+        goto error;
+
+    dso_handle = i965->wl_output->libwl_client_handle;
+    wl_vtable  = &i965->wl_output->vtable;
+    if (!dso_get_symbols(dso_handle, wl_vtable, sizeof(*wl_vtable),
+                         libwl_client_symbols))
+        goto error;
+
+    if (!ensure_driver_vtable(ctx))
+        goto error;
+    return true;
+
+error:
+    i965_output_wayland_terminate(ctx);
+    return false;
+}
+
+void
+i965_output_wayland_terminate(VADriverContextP ctx)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);
+    struct va_wl_output *wl_output;
+
+    if (ctx->display_type != VA_DISPLAY_WAYLAND)
+        return;
+
+    wl_output = i965->wl_output;
+    if (!wl_output)
+        return;
+
+    if (wl_output->wl_drm) {
+        wl_output->vtable.proxy_destroy((struct wl_proxy *)wl_output->wl_drm);
+        wl_output->wl_drm = NULL;
+    }
+
+    if (wl_output->libegl_handle) {
+        dso_close(wl_output->libegl_handle);
+        wl_output->libegl_handle = NULL;
+    }
+
+    if (wl_output->libwl_client_handle) {
+        dso_close(wl_output->libwl_client_handle);
+        wl_output->libwl_client_handle = NULL;
+    }
+    free(wl_output);
+    i965->wl_output = NULL;
+}
diff --git a/src/i965_output_wayland.h b/src/i965_output_wayland.h
new file mode 100644
index 0000000..61ca39f
--- /dev/null
+++ b/src/i965_output_wayland.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef I965_OUTPUT_WAYLAND_H
+#define I965_OUTPUT_WAYLAND_H
+
+#include <stdbool.h>
+
+bool
+i965_output_wayland_init(VADriverContextP ctx);
+
+void
+i965_output_wayland_terminate(VADriverContextP ctx);
+
+#endif /* I965_OUTPUT_WAYLAND_H */
diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c
old mode 100644
new mode 100755
index 6e238b4..f10cadd
--- a/src/i965_post_processing.c
+++ b/src/i965_post_processing.c
@@ -36,6 +36,7 @@
 #include "i965_defines.h"
 #include "i965_structs.h"
 #include "i965_drv_video.h"
+#include "gen75_vpp_vebox.h"
 #include "i965_post_processing.h"
 #include "i965_render.h"
 
@@ -43,41 +44,113 @@
                      IS_GEN6((ctx)->intel.device_id) ||         \
                      IS_GEN7((ctx)->intel.device_id))
 
+#define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
+
+#define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+
+#define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
+#define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
+
+#define GPU_ASM_BLOCK_WIDTH         16
+#define GPU_ASM_BLOCK_HEIGHT        8
+#define GPU_ASM_X_OFFSET_ALIGNMENT  4
+
 static const uint32_t pp_null_gen5[][4] = {
-#include "shaders/post_processing/null.g4b.gen5"
+#include "shaders/post_processing/gen5_6/null.g4b.gen5"
+};
+
+static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
+};
+
+static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
 };
 
-static const uint32_t pp_nv12_load_save_gen5[][4] = {
-#include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
+static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
+};
+
+static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
 };
 
 static const uint32_t pp_nv12_scaling_gen5[][4] = {
-#include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
+#include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
 };
 
 static const uint32_t pp_nv12_avs_gen5[][4] = {
-#include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
+#include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
 };
 
 static const uint32_t pp_nv12_dndi_gen5[][4] = {
-#include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
+#include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
+};
+
+static const uint32_t pp_nv12_dn_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
+};
+
+static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
+};
+
+static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
 };
 
-static void pp_null_initialize(VADriverContextP ctx,
-                               VASurfaceID in_surface_id, VASurfaceID out_surface_id,
-                               const VARectangle *src_rect, const VARectangle *dst_rect);
-static void pp_nv12_avs_initialize(VADriverContextP ctx,
-                                   VASurfaceID in_surface_id, VASurfaceID out_surface_id,
-                                   const VARectangle *src_rect, const VARectangle *dst_rect);
-static void pp_nv12_scaling_initialize(VADriverContextP ctx,
-                                       VASurfaceID in_surface_id, VASurfaceID out_surface_id,
-                                       const VARectangle *src_rect, const VARectangle *dst_rect);
-static void pp_nv12_load_save_initialize(VADriverContextP ctx,
-                                         VASurfaceID in_surface_id, VASurfaceID out_surface_id,
-                                         const VARectangle *src_rect, const VARectangle *dst_rect);
-static void pp_nv12_dndi_initialize(VADriverContextP ctx,
-                                    VASurfaceID in_surface_id, VASurfaceID out_surface_id,
-                                    const VARectangle *src_rect, const VARectangle *dst_rect);
+static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
+};
+
+static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                               const struct i965_surface *src_surface,
+                               const VARectangle *src_rect,
+                               struct i965_surface *dst_surface,
+                               const VARectangle *dst_rect,
+                               void *filter_param);
+static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                            const struct i965_surface *src_surface,
+                                            const VARectangle *src_rect,
+                                            struct i965_surface *dst_surface,
+                                            const VARectangle *dst_rect,
+                                            void *filter_param);
+static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                       const struct i965_surface *src_surface,
+                                       const VARectangle *src_rect,
+                                       struct i965_surface *dst_surface,
+                                       const VARectangle *dst_rect,
+                                       void *filter_param);
+static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                             const struct i965_surface *src_surface,
+                                             const VARectangle *src_rect,
+                                             struct i965_surface *dst_surface,
+                                             const VARectangle *dst_rect,
+                                             void *filter_param);
+static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                            const struct i965_surface *src_surface,
+                                            const VARectangle *src_rect,
+                                            struct i965_surface *dst_surface,
+                                            const VARectangle *dst_rect,
+                                            void *filter_param);
+static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                    const struct i965_surface *src_surface,
+                                    const VARectangle *src_rect,
+                                    struct i965_surface *dst_surface,
+                                    const VARectangle *dst_rect,
+                                    void *filter_param);
+
+static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                  const struct i965_surface *src_surface,
+                                  const VARectangle *src_rect,
+                                  struct i965_surface *dst_surface,
+                                  const VARectangle *dst_rect,
+                                  void *filter_param);
 
 static struct pp_module pp_modules_gen5[] = {
     {
@@ -94,14 +167,50 @@ static struct pp_module pp_modules_gen5[] = {
 
     {
         {
-            "NV12 Load & Save module",
-            PP_NV12_LOAD_SAVE,
-            pp_nv12_load_save_gen5,
-            sizeof(pp_nv12_load_save_gen5),
+            "NV12_NV12",
+            PP_NV12_LOAD_SAVE_N12,
+            pp_nv12_load_save_nv12_gen5,
+            sizeof(pp_nv12_load_save_nv12_gen5),
+            NULL,
+        },
+
+        pp_plx_load_save_plx_initialize,
+    },
+
+    {
+        {
+            "NV12_PL3",
+            PP_NV12_LOAD_SAVE_PL3,
+            pp_nv12_load_save_pl3_gen5,
+            sizeof(pp_nv12_load_save_pl3_gen5),
+            NULL,
+        },
+
+        pp_plx_load_save_plx_initialize,
+    },
+
+    {
+        {
+            "PL3_NV12",
+            PP_PL3_LOAD_SAVE_N12,
+            pp_pl3_load_save_nv12_gen5,
+            sizeof(pp_pl3_load_save_nv12_gen5),
             NULL,
         },
 
-        pp_nv12_load_save_initialize,
+        pp_plx_load_save_plx_initialize,
+    },
+
+    {
+        {
+            "PL3_PL3",
+            PP_PL3_LOAD_SAVE_PL3,
+            pp_pl3_load_save_pl3_gen5,
+            sizeof(pp_pl3_load_save_pl3_gen5),
+            NULL,
+        },
+
+        pp_plx_load_save_plx_initialize
     },
 
     {
@@ -125,7 +234,7 @@ static struct pp_module pp_modules_gen5[] = {
             NULL,
         },
 
-        pp_nv12_avs_initialize,
+        pp_nv12_avs_initialize_nlas,
     },
 
     {
@@ -139,26 +248,101 @@ static struct pp_module pp_modules_gen5[] = {
 
         pp_nv12_dndi_initialize,
     },
+
+    {
+        {
+            "NV12 DN module",
+            PP_NV12_DN,
+            pp_nv12_dn_gen5,
+            sizeof(pp_nv12_dn_gen5),
+            NULL,
+        },
+
+        pp_nv12_dn_initialize,
+    },
+
+    {
+        {
+            "NV12_PA module",
+            PP_NV12_LOAD_SAVE_PA,
+            pp_nv12_load_save_pa_gen5,
+            sizeof(pp_nv12_load_save_pa_gen5),
+            NULL,
+        },
+    
+        pp_plx_load_save_plx_initialize,
+    },
+   {
+        {
+            "PL3_PA module",
+            PP_PL3_LOAD_SAVE_PA,
+            pp_pl3_load_save_pa_gen5,
+            sizeof(pp_pl3_load_save_pa_gen5),
+            NULL,
+        },
+    
+        pp_plx_load_save_plx_initialize,
+    },
+    {
+        {
+            "PA_NV12 module",
+            PP_PA_LOAD_SAVE_NV12,
+            pp_pa_load_save_nv12_gen5,
+            sizeof(pp_pa_load_save_nv12_gen5),
+            NULL,
+        },
+    
+        pp_plx_load_save_plx_initialize,
+    },
+ 
 };
 
 static const uint32_t pp_null_gen6[][4] = {
-#include "shaders/post_processing/null.g6b"
+#include "shaders/post_processing/gen5_6/null.g6b"
 };
 
-static const uint32_t pp_nv12_load_save_gen6[][4] = {
-#include "shaders/post_processing/nv12_load_save_nv12.g6b"
+static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
+};
+
+static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
+};
+
+static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
+};
+
+static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
 };
 
 static const uint32_t pp_nv12_scaling_gen6[][4] = {
-#include "shaders/post_processing/nv12_scaling_nv12.g6b"
+#include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
 };
 
 static const uint32_t pp_nv12_avs_gen6[][4] = {
-#include "shaders/post_processing/nv12_avs_nv12.g6b"
+#include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
 };
 
 static const uint32_t pp_nv12_dndi_gen6[][4] = {
-#include "shaders/post_processing/nv12_dndi_nv12.g6b"
+#include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
+};
+
+static const uint32_t pp_nv12_dn_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
+};
+
+static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
+};
+
+static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
+};
+
+static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
 };
 
 static struct pp_module pp_modules_gen6[] = {
@@ -176,14 +360,50 @@ static struct pp_module pp_modules_gen6[] = {
 
     {
         {
-            "NV12 Load & Save module",
-            PP_NV12_LOAD_SAVE,
-            pp_nv12_load_save_gen6,
-            sizeof(pp_nv12_load_save_gen6),
+            "NV12_NV12",
+            PP_NV12_LOAD_SAVE_N12,
+            pp_nv12_load_save_nv12_gen6,
+            sizeof(pp_nv12_load_save_nv12_gen6),
+            NULL,
+        },
+
+        pp_plx_load_save_plx_initialize,
+    },
+
+    {
+        {
+            "NV12_PL3",
+            PP_NV12_LOAD_SAVE_PL3,
+            pp_nv12_load_save_pl3_gen6,
+            sizeof(pp_nv12_load_save_pl3_gen6),
+            NULL,
+        },
+        
+        pp_plx_load_save_plx_initialize,
+    },
+
+    {
+        {
+            "PL3_NV12",
+            PP_PL3_LOAD_SAVE_N12,
+            pp_pl3_load_save_nv12_gen6,
+            sizeof(pp_pl3_load_save_nv12_gen6),
+            NULL,
+        },
+
+        pp_plx_load_save_plx_initialize,
+    },
+
+    {
+        {
+            "PL3_PL3",
+            PP_PL3_LOAD_SAVE_N12,
+            pp_pl3_load_save_pl3_gen6,
+            sizeof(pp_pl3_load_save_pl3_gen6),
             NULL,
         },
 
-        pp_nv12_load_save_initialize,
+        pp_plx_load_save_plx_initialize,
     },
 
     {
@@ -195,7 +415,7 @@ static struct pp_module pp_modules_gen6[] = {
             NULL,
         },
 
-        pp_nv12_scaling_initialize,
+        gen6_nv12_scaling_initialize,
     },
 
     {
@@ -207,7 +427,7 @@ static struct pp_module pp_modules_gen6[] = {
             NULL,
         },
 
-        pp_nv12_avs_initialize,
+        pp_nv12_avs_initialize_nlas,
     },
 
     {
@@ -221,185 +441,641 @@ static struct pp_module pp_modules_gen6[] = {
 
         pp_nv12_dndi_initialize,
     },
-};
 
-#define pp_static_parameter     pp_context->pp_static_parameter
-#define pp_inline_parameter     pp_context->pp_inline_parameter
+    {
+        {
+            "NV12 DN module",
+            PP_NV12_DN,
+            pp_nv12_dn_gen6,
+            sizeof(pp_nv12_dn_gen6),
+            NULL,
+        },
 
-static void
-pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
-{
-    switch (tiling) {
-    case I915_TILING_NONE:
-        ss->ss3.tiled_surface = 0;
-        ss->ss3.tile_walk = 0;
-        break;
-    case I915_TILING_X:
-        ss->ss3.tiled_surface = 1;
-        ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
-        break;
-    case I915_TILING_Y:
-        ss->ss3.tiled_surface = 1;
-        ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
-        break;
-    }
-}
+        pp_nv12_dn_initialize,
+    },
+    {
+        {
+            "NV12_PA module",
+            PP_NV12_LOAD_SAVE_PA,
+            pp_nv12_load_save_pa_gen6,
+            sizeof(pp_nv12_load_save_pa_gen6),
+            NULL,
+        },
+    
+        pp_plx_load_save_plx_initialize,
+    },
+    {
+        {
+            "PL3_PA module",
+            PP_PL3_LOAD_SAVE_PA,
+            pp_pl3_load_save_pa_gen6,
+            sizeof(pp_pl3_load_save_pa_gen6),
+            NULL,
+        },
+    
+        pp_plx_load_save_plx_initialize,
+    },
+    {
+        {
+            "PA_NV12 module",
+            PP_PA_LOAD_SAVE_NV12,
+            pp_pa_load_save_nv12_gen6,
+            sizeof(pp_pa_load_save_nv12_gen6),
+            NULL,
+        },
+    
+        pp_plx_load_save_plx_initialize,
+    },
 
-static void
-pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
-{
-    switch (tiling) {
-    case I915_TILING_NONE:
-        ss->ss2.tiled_surface = 0;
-        ss->ss2.tile_walk = 0;
-        break;
-    case I915_TILING_X:
-        ss->ss2.tiled_surface = 1;
-        ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
-        break;
-    case I915_TILING_Y:
-        ss->ss2.tiled_surface = 1;
-        ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
-        break;
-    }
-}
+};
 
-static void
-ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
-{
+static const uint32_t pp_null_gen7[][4] = {
+};
 
-}
+static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
+};
 
-static void
-ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
-{
-    struct i965_interface_descriptor *desc;
-    dri_bo *bo;
-    int pp_index = pp_context->current_pp;
+static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
+};
 
-    bo = pp_context->idrt.bo;
-    dri_bo_map(bo, 1);
-    assert(bo->virtual);
-    desc = bo->virtual;
-    memset(desc, 0, sizeof(*desc));
-    desc->desc0.grf_reg_blocks = 10;
-    desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
-    desc->desc1.const_urb_entry_read_offset = 0;
-    desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
-    desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
-    desc->desc2.sampler_count = 0;
-    desc->desc3.binding_table_entry_count = 0;
-    desc->desc3.binding_table_pointer = 
-        pp_context->binding_table.bo->offset >> 5; /*reloc */
+static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
+};
 
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_INSTRUCTION, 0,
-                      desc->desc0.grf_reg_blocks,
-                      offsetof(struct i965_interface_descriptor, desc0),
-                      pp_context->pp_modules[pp_index].kernel.bo);
+static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
+};
 
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_INSTRUCTION, 0,
-                      desc->desc2.sampler_count << 2,
-                      offsetof(struct i965_interface_descriptor, desc2),
-                      pp_context->sampler_state_table.bo);
+static const uint32_t pp_nv12_scaling_gen7[][4] = {
+#include "shaders/post_processing/gen7/avs.g7b"
+};
 
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_INSTRUCTION, 0,
-                      desc->desc3.binding_table_entry_count,
-                      offsetof(struct i965_interface_descriptor, desc3),
-                      pp_context->binding_table.bo);
+static const uint32_t pp_nv12_avs_gen7[][4] = {
+#include "shaders/post_processing/gen7/avs.g7b"
+};
 
-    dri_bo_unmap(bo);
-    pp_context->idrt.num_interface_descriptors++;
-}
+static const uint32_t pp_nv12_dndi_gen7[][4] = {
+#include "shaders/post_processing/gen7/dndi.g7b"
+};
 
-static void
-ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
-{
-    unsigned int *binding_table;
-    dri_bo *bo = pp_context->binding_table.bo;
-    int i;
+static const uint32_t pp_nv12_dn_gen7[][4] = {
+#include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
+};
 
-    dri_bo_map(bo, 1);
-    assert(bo->virtual);
-    binding_table = bo->virtual;
-    memset(binding_table, 0, bo->size);
-
-    for (i = 0; i < MAX_PP_SURFACES; i++) {
-        if (pp_context->surfaces[i].ss_bo) {
-            assert(pp_context->surfaces[i].s_bo);
-
-            binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
-            dri_bo_emit_reloc(bo,
-                              I915_GEM_DOMAIN_INSTRUCTION, 0,
-                              0,
-                              i * sizeof(*binding_table),
-                              pp_context->surfaces[i].ss_bo);
-        }
-    
-    }
+static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pa.g7b"
+};
 
-    dri_bo_unmap(bo);
-}
+static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pa.g7b"
+};
 
-static void
-ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
-{
-    struct i965_vfe_state *vfe_state;
-    dri_bo *bo;
+static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
+#include "shaders/post_processing/gen7/pa_to_pl2.g7b"
+};
 
-    bo = pp_context->vfe_state.bo;
-    dri_bo_map(bo, 1);
-    assert(bo->virtual);
-    vfe_state = bo->virtual;
-    memset(vfe_state, 0, sizeof(*vfe_state));
-    vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
-    vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
-    vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
-    vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
-    vfe_state->vfe1.children_present = 0;
-    vfe_state->vfe2.interface_descriptor_base = 
-        pp_context->idrt.bo->offset >> 4; /* reloc */
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_INSTRUCTION, 0,
-                      0,
-                      offsetof(struct i965_vfe_state, vfe2),
-                      pp_context->idrt.bo);
-    dri_bo_unmap(bo);
-}
+static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                           const struct i965_surface *src_surface,
+                                           const VARectangle *src_rect,
+                                           struct i965_surface *dst_surface,
+                                           const VARectangle *dst_rect,
+                                           void *filter_param);
+static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                             const struct i965_surface *src_surface,
+                                             const VARectangle *src_rect,
+                                             struct i965_surface *dst_surface,
+                                             const VARectangle *dst_rect,
+                                             void *filter_param);
+static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                           const struct i965_surface *src_surface,
+                                           const VARectangle *src_rect,
+                                           struct i965_surface *dst_surface,
+                                           const VARectangle *dst_rect,
+                                           void *filter_param);
+
+static struct pp_module pp_modules_gen7[] = {
+    {
+        {
+            "NULL module (for testing)",
+            PP_NULL,
+            pp_null_gen7,
+            sizeof(pp_null_gen7),
+            NULL,
+        },
 
-static void
-ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
-{
-    unsigned char *constant_buffer;
+        pp_null_initialize,
+    },
 
-    assert(sizeof(pp_static_parameter) == 128);
-    dri_bo_map(pp_context->curbe.bo, 1);
+    {
+        {
+            "NV12_NV12",
+            PP_NV12_LOAD_SAVE_N12,
+            pp_nv12_load_save_nv12_gen7,
+            sizeof(pp_nv12_load_save_nv12_gen7),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12_PL3",
+            PP_NV12_LOAD_SAVE_PL3,
+            pp_nv12_load_save_pl3_gen7,
+            sizeof(pp_nv12_load_save_pl3_gen7),
+            NULL,
+        },
+        
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "PL3_NV12",
+            PP_PL3_LOAD_SAVE_N12,
+            pp_pl3_load_save_nv12_gen7,
+            sizeof(pp_pl3_load_save_nv12_gen7),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "PL3_PL3",
+            PP_PL3_LOAD_SAVE_N12,
+            pp_pl3_load_save_pl3_gen7,
+            sizeof(pp_pl3_load_save_pl3_gen7),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12 Scaling module",
+            PP_NV12_SCALING,
+            pp_nv12_scaling_gen7,
+            sizeof(pp_nv12_scaling_gen7),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12 AVS module",
+            PP_NV12_AVS,
+            pp_nv12_avs_gen7,
+            sizeof(pp_nv12_avs_gen7),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12 DNDI module",
+            PP_NV12_DNDI,
+            pp_nv12_dndi_gen7,
+            sizeof(pp_nv12_dndi_gen7),
+            NULL,
+        },
+
+        gen7_pp_nv12_dndi_initialize,
+    },
+
+    {
+        {
+            "NV12 DN module",
+            PP_NV12_DN,
+            pp_nv12_dn_gen7,
+            sizeof(pp_nv12_dn_gen7),
+            NULL,
+        },
+
+        gen7_pp_nv12_dn_initialize,
+    },
+    {
+        {
+            "NV12_PA module",
+            PP_NV12_LOAD_SAVE_PA,
+            pp_nv12_load_save_pa_gen7,
+            sizeof(pp_nv12_load_save_pa_gen7),
+            NULL,
+        },
+    
+        gen7_pp_plx_avs_initialize,
+    },
+    {
+        {
+            "PL3_PA module",
+            PP_PL3_LOAD_SAVE_PA,
+            pp_pl3_load_save_pa_gen7,
+            sizeof(pp_pl3_load_save_pa_gen7),
+            NULL,
+        },
+    
+        gen7_pp_plx_avs_initialize,
+    },
+ 
+    {
+        {
+            "PA_NV12 module",
+            PP_PA_LOAD_SAVE_NV12,
+            pp_pa_load_save_nv12_gen7,
+            sizeof(pp_pa_load_save_nv12_gen7),
+            NULL,
+        },
+    
+        gen7_pp_plx_avs_initialize,
+    },
+};
+
+static const uint32_t pp_null_gen75[][4] = {
+};
+
+static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
+};
+
+static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
+};
+
+static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
+};
+
+static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
+};
+
+static const uint32_t pp_nv12_scaling_gen75[][4] = {
+#include "shaders/post_processing/gen7/avs.g75b"
+};
+
+static const uint32_t pp_nv12_avs_gen75[][4] = {
+#include "shaders/post_processing/gen7/avs.g75b"
+};
+
+static const uint32_t pp_nv12_dndi_gen75[][4] = {
+// #include "shaders/post_processing/gen7/dndi.g75b"
+};
+
+static const uint32_t pp_nv12_dn_gen75[][4] = {
+// #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
+};
+static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pa.g75b"
+};
+
+static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pa.g75b"
+};
+
+static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
+#include "shaders/post_processing/gen7/pa_to_pl2.g75b"
+};
+
+static struct pp_module pp_modules_gen75[] = {
+    {
+        {
+            "NULL module (for testing)",
+            PP_NULL,
+            pp_null_gen75,
+            sizeof(pp_null_gen75),
+            NULL,
+        },
+
+        pp_null_initialize,
+    },
+
+    {
+        {
+            "NV12_NV12",
+            PP_NV12_LOAD_SAVE_N12,
+            pp_nv12_load_save_nv12_gen75,
+            sizeof(pp_nv12_load_save_nv12_gen75),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12_PL3",
+            PP_NV12_LOAD_SAVE_PL3,
+            pp_nv12_load_save_pl3_gen75,
+            sizeof(pp_nv12_load_save_pl3_gen75),
+            NULL,
+        },
+        
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "PL3_NV12",
+            PP_PL3_LOAD_SAVE_N12,
+            pp_pl3_load_save_nv12_gen75,
+            sizeof(pp_pl3_load_save_nv12_gen75),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "PL3_PL3",
+            PP_PL3_LOAD_SAVE_N12,
+            pp_pl3_load_save_pl3_gen75,
+            sizeof(pp_pl3_load_save_pl3_gen75),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12 Scaling module",
+            PP_NV12_SCALING,
+            pp_nv12_scaling_gen75,
+            sizeof(pp_nv12_scaling_gen75),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12 AVS module",
+            PP_NV12_AVS,
+            pp_nv12_avs_gen75,
+            sizeof(pp_nv12_avs_gen75),
+            NULL,
+        },
+
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "NV12 DNDI module",
+            PP_NV12_DNDI,
+            pp_nv12_dndi_gen75,
+            sizeof(pp_nv12_dndi_gen75),
+            NULL,
+        },
+
+        gen7_pp_nv12_dndi_initialize,
+    },
+
+    {
+        {
+            "NV12 DN module",
+            PP_NV12_DN,
+            pp_nv12_dn_gen75,
+            sizeof(pp_nv12_dn_gen75),
+            NULL,
+        },
+
+        gen7_pp_nv12_dn_initialize,
+    },
+    {
+        {
+            "NV12_PA module",
+            PP_NV12_LOAD_SAVE_PA,
+            pp_nv12_load_save_pa_gen75,
+            sizeof(pp_nv12_load_save_pa_gen75),
+            NULL,
+        },
+    
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "PL3_PA module",
+            PP_PL3_LOAD_SAVE_PA,
+            pp_pl3_load_save_pa_gen75,
+            sizeof(pp_pl3_load_save_pa_gen75),
+            NULL,
+        },
+    
+        gen7_pp_plx_avs_initialize,
+    },
+
+    {
+        {
+            "PA_NV12 module",
+            PP_PA_LOAD_SAVE_NV12,
+            pp_pa_load_save_nv12_gen75,
+            sizeof(pp_pa_load_save_nv12_gen75),
+            NULL,
+        },
+    
+        gen7_pp_plx_avs_initialize,
+    },
+          
+};
+
+static int
+pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int fourcc;
+
+    if (surface->type == I965_SURFACE_TYPE_IMAGE) {
+        struct object_image *obj_image = IMAGE(surface->id);
+        fourcc = obj_image->image.format.fourcc;
+    } else {
+        struct object_surface *obj_surface = SURFACE(surface->id);
+        fourcc = obj_surface->fourcc;
+    }
+
+    return fourcc;
+}
+
+static void
+pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss3.tiled_surface = 0;
+        ss->ss3.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss3.tiled_surface = 1;
+        ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss3.tiled_surface = 1;
+        ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+static void
+pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss2.tiled_surface = 0;
+        ss->ss2.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+static void
+gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss0.tiled_surface = 0;
+        ss->ss0.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss0.tiled_surface = 1;
+        ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss0.tiled_surface = 1;
+        ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+static void
+gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss2.tiled_surface = 0;
+        ss->ss2.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss2.tiled_surface = 1;
+        ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+
+static void
+ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
+{
+    struct i965_interface_descriptor *desc;
+    dri_bo *bo;
+    int pp_index = pp_context->current_pp;
+
+    bo = pp_context->idrt.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    desc = bo->virtual;
+    memset(desc, 0, sizeof(*desc));
+    desc->desc0.grf_reg_blocks = 10;
+    desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
+    desc->desc1.const_urb_entry_read_offset = 0;
+    desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
+    desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
+    desc->desc2.sampler_count = 0;
+    desc->desc3.binding_table_entry_count = 0;
+    desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
+
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      desc->desc0.grf_reg_blocks,
+                      offsetof(struct i965_interface_descriptor, desc0),
+                      pp_context->pp_modules[pp_index].kernel.bo);
+
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      desc->desc2.sampler_count << 2,
+                      offsetof(struct i965_interface_descriptor, desc2),
+                      pp_context->sampler_state_table.bo);
+
+    dri_bo_unmap(bo);
+    pp_context->idrt.num_interface_descriptors++;
+}
+
+static void
+ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
+{
+    struct i965_vfe_state *vfe_state;
+    dri_bo *bo;
+
+    bo = pp_context->vfe_state.bo;
+    dri_bo_map(bo, 1);
+    assert(bo->virtual);
+    vfe_state = bo->virtual;
+    memset(vfe_state, 0, sizeof(*vfe_state));
+    vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
+    vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
+    vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
+    vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
+    vfe_state->vfe1.children_present = 0;
+    vfe_state->vfe2.interface_descriptor_base = 
+        pp_context->idrt.bo->offset >> 4; /* reloc */
+    dri_bo_emit_reloc(bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      0,
+                      offsetof(struct i965_vfe_state, vfe2),
+                      pp_context->idrt.bo);
+    dri_bo_unmap(bo);
+}
+
+static void
+ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
+{
+    unsigned char *constant_buffer;
+    struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+
+    assert(sizeof(*pp_static_parameter) == 128);
+    dri_bo_map(pp_context->curbe.bo, 1);
     assert(pp_context->curbe.bo->virtual);
     constant_buffer = pp_context->curbe.bo->virtual;
-    memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
+    memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
     dri_bo_unmap(pp_context->curbe.bo);
 }
 
 static void
-ironlake_pp_states_setup(VADriverContextP ctx)
+ironlake_pp_states_setup(VADriverContextP ctx,
+                         struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_post_processing_context *pp_context = i965->pp_context;
-
-    ironlake_pp_surface_state(pp_context);
-    ironlake_pp_binding_table(pp_context);
     ironlake_pp_interface_descriptor_table(pp_context);
     ironlake_pp_vfe_state(pp_context);
     ironlake_pp_upload_constants(pp_context);
 }
 
 static void
-ironlake_pp_pipeline_select(VADriverContextP ctx)
+ironlake_pp_pipeline_select(VADriverContextP ctx,
+                            struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
 
     BEGIN_BATCH(batch, 1);
     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
@@ -407,10 +1083,10 @@ ironlake_pp_pipeline_select(VADriverContextP ctx)
 }
 
 static void
-ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+ironlake_pp_urb_layout(VADriverContextP ctx,
+                       struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
     unsigned int vfe_fence, cs_fence;
 
     vfe_fence = pp_context->urb.cs_start;
@@ -426,15 +1102,15 @@ ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context
 }
 
 static void
-ironlake_pp_state_base_address(VADriverContextP ctx)
+ironlake_pp_state_base_address(VADriverContextP ctx,
+                               struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
 
     BEGIN_BATCH(batch, 8);
     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
-    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+    OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
@@ -444,10 +1120,10 @@ ironlake_pp_state_base_address(VADriverContextP ctx)
 }
 
 static void
-ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+ironlake_pp_state_pointers(VADriverContextP ctx,
+                           struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
 
     BEGIN_BATCH(batch, 3);
     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
@@ -457,10 +1133,10 @@ ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_con
 }
 
 static void 
-ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+ironlake_pp_cs_urb_layout(VADriverContextP ctx,
+                          struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
 
     BEGIN_BATCH(batch, 2);
     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
@@ -471,10 +1147,10 @@ ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_cont
 }
 
 static void
-ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+ironlake_pp_constant_buffer(VADriverContextP ctx,
+                            struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
 
     BEGIN_BATCH(batch, 2);
     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
@@ -485,53 +1161,512 @@ ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_co
 }
 
 static void
-ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+ironlake_pp_object_walker(VADriverContextP ctx,
+                          struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
     int x, x_steps, y, y_steps;
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
 
     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
 
-    for (y = 0; y < y_steps; y++) {
-        for (x = 0; x < x_steps; x++) {
-            if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
-                BEGIN_BATCH(batch, 20);
-                OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
-                OUT_BATCH(batch, 0);
-                OUT_BATCH(batch, 0); /* no indirect data */
-                OUT_BATCH(batch, 0);
+    for (y = 0; y < y_steps; y++) {
+        for (x = 0; x < x_steps; x++) {
+            if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
+                BEGIN_BATCH(batch, 20);
+                OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
+                OUT_BATCH(batch, 0);
+                OUT_BATCH(batch, 0); /* no indirect data */
+                OUT_BATCH(batch, 0);
+
+                /* inline data grf 5-6 */
+                assert(sizeof(*pp_inline_parameter) == 64);
+                intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
+
+                ADVANCE_BATCH(batch);
+            }
+        }
+    }
+}
+
+static void
+ironlake_pp_pipeline_setup(VADriverContextP ctx,
+                           struct i965_post_processing_context *pp_context)
+{
+    struct intel_batchbuffer *batch = pp_context->batch;
+
+    intel_batchbuffer_start_atomic(batch, 0x1000);
+    intel_batchbuffer_emit_mi_flush(batch);
+    ironlake_pp_pipeline_select(ctx, pp_context);
+    ironlake_pp_state_base_address(ctx, pp_context);
+    ironlake_pp_state_pointers(ctx, pp_context);
+    ironlake_pp_urb_layout(ctx, pp_context);
+    ironlake_pp_cs_urb_layout(ctx, pp_context);
+    ironlake_pp_constant_buffer(ctx, pp_context);
+    ironlake_pp_object_walker(ctx, pp_context);
+    intel_batchbuffer_end_atomic(batch);
+}
+
+// update u/v offset when the surface format are packed yuv
+static void i965_update_src_surface_uv_offset(
+    VADriverContextP    ctx, 
+    struct i965_post_processing_context *pp_context,
+    const struct i965_surface *surface)
+{
+    struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+    int fourcc = pp_get_surface_fourcc(ctx, surface);
+    
+    if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
+        pp_static_parameter->grf1.source_packed_u_offset = 1;
+        pp_static_parameter->grf1.source_packed_v_offset = 3;
+    } 
+    else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+        pp_static_parameter->grf1.source_packed_y_offset = 1;
+        pp_static_parameter->grf1.source_packed_v_offset = 2;
+    }
+    
+}
+
+static void i965_update_dst_surface_uv_offset(
+    VADriverContextP    ctx, 
+    struct i965_post_processing_context *pp_context,
+    const struct i965_surface *surface)
+{
+    struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+    int fourcc = pp_get_surface_fourcc(ctx, surface);
+    
+    if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
+        pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
+        pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
+    } 
+    else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+        pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
+        pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
+    }
+    
+}
+
+static void
+i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                          dri_bo *surf_bo, unsigned long surf_bo_offset,
+                          int width, int height, int pitch, int format, 
+                          int index, int is_target)
+{
+    struct i965_surface_state *ss;
+    dri_bo *ss_bo;
+    unsigned int tiling;
+    unsigned int swizzle;
+
+    dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
+    ss_bo = pp_context->surface_state_binding_table.bo;
+    assert(ss_bo);
+
+    dri_bo_map(ss_bo, True);
+    assert(ss_bo->virtual);
+    ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
+    memset(ss, 0, sizeof(*ss));
+    ss->ss0.surface_type = I965_SURFACE_2D;
+    ss->ss0.surface_format = format;
+    ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
+    ss->ss2.width = width - 1;
+    ss->ss2.height = height - 1;
+    ss->ss3.pitch = pitch - 1;
+    pp_set_surface_tiling(ss, tiling);
+    dri_bo_emit_reloc(ss_bo,
+                      I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
+                      surf_bo_offset,
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
+                      surf_bo);
+    ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+    dri_bo_unmap(ss_bo);
+}
+
+static void
+i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                           dri_bo *surf_bo, unsigned long surf_bo_offset,
+                           int width, int height, int wpitch,
+                           int xoffset, int yoffset,
+                           int format, int interleave_chroma,
+                           int index)
+{
+    struct i965_surface_state2 *ss2;
+    dri_bo *ss2_bo;
+    unsigned int tiling;
+    unsigned int swizzle;
+
+    dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
+    ss2_bo = pp_context->surface_state_binding_table.bo;
+    assert(ss2_bo);
+
+    dri_bo_map(ss2_bo, True);
+    assert(ss2_bo->virtual);
+    ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
+    memset(ss2, 0, sizeof(*ss2));
+    ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
+    ss2->ss1.cbcr_pixel_offset_v_direction = 0;
+    ss2->ss1.width = width - 1;
+    ss2->ss1.height = height - 1;
+    ss2->ss2.pitch = wpitch - 1;
+    ss2->ss2.interleave_chroma = interleave_chroma;
+    ss2->ss2.surface_format = format;
+    ss2->ss3.x_offset_for_cb = xoffset;
+    ss2->ss3.y_offset_for_cb = yoffset;
+    pp_set_surface2_tiling(ss2, tiling);
+    dri_bo_emit_reloc(ss2_bo,
+                      I915_GEM_DOMAIN_RENDER, 0,
+                      surf_bo_offset,
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
+                      surf_bo);
+    ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+    dri_bo_unmap(ss2_bo);
+}
+
+static void
+gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                          dri_bo *surf_bo, unsigned long surf_bo_offset,
+                          int width, int height, int pitch, int format, 
+                          int index, int is_target)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);  
+    struct gen7_surface_state *ss;
+    dri_bo *ss_bo;
+    unsigned int tiling;
+    unsigned int swizzle;
+
+    dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
+    ss_bo = pp_context->surface_state_binding_table.bo;
+    assert(ss_bo);
+
+    dri_bo_map(ss_bo, True);
+    assert(ss_bo->virtual);
+    ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
+    memset(ss, 0, sizeof(*ss));
+    ss->ss0.surface_type = I965_SURFACE_2D;
+    ss->ss0.surface_format = format;
+    ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
+    ss->ss2.width = width - 1;
+    ss->ss2.height = height - 1;
+    ss->ss3.pitch = pitch - 1;
+    gen7_pp_set_surface_tiling(ss, tiling);
+    if (IS_HASWELL(i965->intel.device_id))
+        gen7_render_set_surface_scs(ss);
+    dri_bo_emit_reloc(ss_bo,
+                      I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
+                      surf_bo_offset,
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
+                      surf_bo);
+    ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+    dri_bo_unmap(ss_bo);
+}
+
+static void
+gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                           dri_bo *surf_bo, unsigned long surf_bo_offset,
+                           int width, int height, int wpitch,
+                           int xoffset, int yoffset,
+                           int format, int interleave_chroma,
+                           int index)
+{
+    struct gen7_surface_state2 *ss2;
+    dri_bo *ss2_bo;
+    unsigned int tiling;
+    unsigned int swizzle;
+
+    dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
+    ss2_bo = pp_context->surface_state_binding_table.bo;
+    assert(ss2_bo);
+
+    dri_bo_map(ss2_bo, True);
+    assert(ss2_bo->virtual);
+    ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
+    memset(ss2, 0, sizeof(*ss2));
+    ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
+    ss2->ss1.cbcr_pixel_offset_v_direction = 0;
+    ss2->ss1.width = width - 1;
+    ss2->ss1.height = height - 1;
+    ss2->ss2.pitch = wpitch - 1;
+    ss2->ss2.interleave_chroma = interleave_chroma;
+    ss2->ss2.surface_format = format;
+    ss2->ss3.x_offset_for_cb = xoffset;
+    ss2->ss3.y_offset_for_cb = yoffset;
+    gen7_pp_set_surface2_tiling(ss2, tiling);
+    dri_bo_emit_reloc(ss2_bo,
+                      I915_GEM_DOMAIN_RENDER, 0,
+                      surf_bo_offset,
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
+                      surf_bo);
+    ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+    dri_bo_unmap(ss2_bo);
+}
+
+
+static void 
+pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                const struct i965_surface *surface, 
+                                int base_index, int is_target,
+                                int *width, int *height, int *pitch, int *offset)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    struct object_image *obj_image;
+    dri_bo *bo;
+    int fourcc = pp_get_surface_fourcc(ctx, surface);
+    const int Y = 0;
+    const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
+    const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
+    const int UV = 1;
+    int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
+    int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
+ 
+    int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
+                              fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
+                              fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
+                              fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
+    int scale_factor_of_1st_plane_width_in_byte = 1;
+                              
+ 
+    if (surface->type == I965_SURFACE_TYPE_SURFACE) {
+        obj_surface = SURFACE(surface->id);
+        bo = obj_surface->bo;
+        width[0] = obj_surface->orig_width;
+        height[0] = obj_surface->orig_height;
+        pitch[0] = obj_surface->width;
+        offset[0] = 0;
+
+        if (full_packed_format) {
+            scale_factor_of_1st_plane_width_in_byte = 4; 
+            pitch[0] = obj_surface->width * 4;
+        }
+        else if (packed_yuv ) {
+            scale_factor_of_1st_plane_width_in_byte =  2; 
+            pitch[0] = obj_surface->width * 2;
+        }
+        else if (interleaved_uv) {
+            width[1] = obj_surface->orig_width;
+            height[1] = obj_surface->orig_height / 2;
+            pitch[1] = obj_surface->width;
+            offset[1] = offset[0] + obj_surface->width * obj_surface->height;
+        } else {
+            width[1] = obj_surface->orig_width / 2;
+            height[1] = obj_surface->orig_height / 2;
+            pitch[1] = obj_surface->width / 2;
+            offset[1] = offset[0] + obj_surface->width * obj_surface->height;
+            width[2] = obj_surface->orig_width / 2;
+            height[2] = obj_surface->orig_height / 2;
+            pitch[2] = obj_surface->width / 2;
+            offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
+        }
+    } else {
+        obj_image = IMAGE(surface->id);
+        bo = obj_image->bo;
+        width[0] = obj_image->image.width;
+        height[0] = obj_image->image.height;
+        pitch[0] = obj_image->image.pitches[0];
+        offset[0] = obj_image->image.offsets[0];
+
+        if (full_packed_format) {
+            scale_factor_of_1st_plane_width_in_byte = 4;
+        }
+        else if (packed_yuv ) {
+            scale_factor_of_1st_plane_width_in_byte = 2;
+        }
+        else if (interleaved_uv) {
+            width[1] = obj_image->image.width;
+            height[1] = obj_image->image.height / 2;
+            pitch[1] = obj_image->image.pitches[1];
+            offset[1] = obj_image->image.offsets[1];
+        } else {
+            width[1] = obj_image->image.width / 2;
+            height[1] = obj_image->image.height / 2;
+            pitch[1] = obj_image->image.pitches[1];
+            offset[1] = obj_image->image.offsets[1];
+            width[2] = obj_image->image.width / 2;
+            height[2] = obj_image->image.height / 2;
+            pitch[2] = obj_image->image.pitches[2];
+            offset[2] = obj_image->image.offsets[2];
+        }
+    }
+
+    /* Y surface */
+    i965_pp_set_surface_state(ctx, pp_context,
+                              bo, offset[Y],
+                              width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
+                              base_index, is_target);
+
+    if (!packed_yuv && !full_packed_format) {
+        if (interleaved_uv) {
+            i965_pp_set_surface_state(ctx, pp_context,
+                                      bo, offset[UV],
+                                      width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
+                                      base_index + 1, is_target);
+        } else {
+            /* U surface */
+            i965_pp_set_surface_state(ctx, pp_context,
+                                      bo, offset[U],
+                                      width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
+                                      base_index + 1, is_target);
+
+            /* V surface */
+            i965_pp_set_surface_state(ctx, pp_context,
+                                      bo, offset[V],
+                                      width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
+                                      base_index + 2, is_target);
+        }
+    }
+
+}
+
+static void 
+gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                     const struct i965_surface *surface, 
+                                     int base_index, int is_target,
+                                     int *width, int *height, int *pitch, int *offset)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;
+    struct object_image *obj_image;
+    dri_bo *bo;
+    int fourcc = pp_get_surface_fourcc(ctx, surface);
+    const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
+                   fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
+    const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
+                   fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
+    int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
+    int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
+
+    if (surface->type == I965_SURFACE_TYPE_SURFACE) {
+        obj_surface = SURFACE(surface->id);
+        bo = obj_surface->bo;
+        width[0] = obj_surface->orig_width;
+        height[0] = obj_surface->orig_height;
+        pitch[0] = obj_surface->width;
+        offset[0] = 0;
+
+        if (packed_yuv) {
+            if (is_target)
+                width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
+            else
+                width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
+
+            pitch[0] = obj_surface->width * 2;
+        }
+
+        width[1] = obj_surface->cb_cr_width;
+        height[1] = obj_surface->cb_cr_height;
+        pitch[1] = obj_surface->cb_cr_pitch;
+        offset[1] = obj_surface->y_cb_offset * obj_surface->width;
+
+        width[2] = obj_surface->cb_cr_width;
+        height[2] = obj_surface->cb_cr_height;
+        pitch[2] = obj_surface->cb_cr_pitch;
+        offset[2] = obj_surface->y_cr_offset * obj_surface->width;
+    } else {
+        obj_image = IMAGE(surface->id);
+        bo = obj_image->bo;
+        width[0] = obj_image->image.width;
+        height[0] = obj_image->image.height;
+        pitch[0] = obj_image->image.pitches[0];
+        offset[0] = obj_image->image.offsets[0];
+
+        if (packed_yuv) {
+            if (is_target)
+                width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
+            else
+                width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
+        } else if (interleaved_uv) {
+            width[1] = obj_image->image.width / 2;
+            height[1] = obj_image->image.height / 2;
+            pitch[1] = obj_image->image.pitches[1];
+            offset[1] = obj_image->image.offsets[1];
+        } else {
+            width[1] = obj_image->image.width / 2;
+            height[1] = obj_image->image.height / 2;
+            pitch[1] = obj_image->image.pitches[U];
+            offset[1] = obj_image->image.offsets[U];
+            width[2] = obj_image->image.width / 2;
+            height[2] = obj_image->image.height / 2;
+            pitch[2] = obj_image->image.pitches[V];
+            offset[2] = obj_image->image.offsets[V];
+        }
+    }
+
+    if (is_target) {
+        gen7_pp_set_surface_state(ctx, pp_context,
+                                  bo, 0,
+                                  width[0] / 4, height[0], pitch[0],
+                                  I965_SURFACEFORMAT_R8_SINT,
+                                  base_index, 1);
+
+        if (!packed_yuv) {
+            if (interleaved_uv) {
+                gen7_pp_set_surface_state(ctx, pp_context,
+                                          bo, offset[1],
+                                          width[1] / 2, height[1], pitch[1],
+                                          I965_SURFACEFORMAT_R8G8_SINT,
+                                          base_index + 1, 1);
+            } else {
+                gen7_pp_set_surface_state(ctx, pp_context,
+                                          bo, offset[1],
+                                          width[1] / 4, height[1], pitch[1],
+                                          I965_SURFACEFORMAT_R8_SINT,
+                                          base_index + 1, 1);
+                gen7_pp_set_surface_state(ctx, pp_context,
+                                          bo, offset[2],
+                                          width[2] / 4, height[2], pitch[2],
+                                          I965_SURFACEFORMAT_R8_SINT,
+                                          base_index + 2, 1);
+            }
+        }
+    } else {
+        int format0 = SURFACE_FORMAT_Y8_UNORM;
+
+        switch (fourcc) {
+        case VA_FOURCC('Y', 'U', 'Y', '2'):
+            format0 = SURFACE_FORMAT_YCRCB_NORMAL;
+            break;
 
-                /* inline data grf 5-6 */
-                assert(sizeof(pp_inline_parameter) == 64);
-                intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
+        case VA_FOURCC('U', 'Y', 'V', 'Y'):
+            format0 = SURFACE_FORMAT_YCRCB_SWAPY;
+            break;
 
-                ADVANCE_BATCH(batch);
+        default:
+            break;
+        }
+
+        gen7_pp_set_surface2_state(ctx, pp_context,
+                                   bo, offset[0],
+                                   width[0], height[0], pitch[0],
+                                   0, 0,
+                                   format0, 0,
+                                   base_index);
+
+        if (!packed_yuv) {
+            if (interleaved_uv) {
+                gen7_pp_set_surface2_state(ctx, pp_context,
+                                           bo, offset[1],
+                                           width[1], height[1], pitch[1],
+                                           0, 0,
+                                           SURFACE_FORMAT_R8B8_UNORM, 0,
+                                           base_index + 1);
+            } else {
+                gen7_pp_set_surface2_state(ctx, pp_context,
+                                           bo, offset[1],
+                                           width[1], height[1], pitch[1],
+                                           0, 0,
+                                           SURFACE_FORMAT_R8_UNORM, 0,
+                                           base_index + 1);
+                gen7_pp_set_surface2_state(ctx, pp_context,
+                                           bo, offset[2],
+                                           width[2], height[2], pitch[2],
+                                           0, 0,
+                                           SURFACE_FORMAT_R8_UNORM, 0,
+                                           base_index + 2);
             }
         }
     }
 }
 
-static void
-ironlake_pp_pipeline_setup(VADriverContextP ctx)
-{
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
-    struct i965_post_processing_context *pp_context = i965->pp_context;
-
-    intel_batchbuffer_start_atomic(batch, 0x1000);
-    intel_batchbuffer_emit_mi_flush(batch);
-    ironlake_pp_pipeline_select(ctx);
-    ironlake_pp_state_base_address(ctx);
-    ironlake_pp_state_pointers(ctx, pp_context);
-    ironlake_pp_urb_layout(ctx, pp_context);
-    ironlake_pp_cs_urb_layout(ctx, pp_context);
-    ironlake_pp_constant_buffer(ctx, pp_context);
-    ironlake_pp_object_walker(ctx, pp_context);
-    intel_batchbuffer_end_atomic(batch);
-}
-
 static int
 pp_null_x_steps(void *private_context)
 {
@@ -550,18 +1685,22 @@ pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int
     return 0;
 }
 
-static void
-pp_null_initialize(VADriverContextP ctx, 
-                   VASurfaceID in_surface_id, VASurfaceID out_surface_id,
-                   const VARectangle *src_rect, const VARectangle *dst_rect)
+static VAStatus
+pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                   const struct i965_surface *src_surface,
+                   const VARectangle *src_rect,
+                   struct i965_surface *dst_surface,
+                   const VARectangle *dst_rect,
+                   void *filter_param)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_post_processing_context *pp_context = i965->pp_context;
-
     /* private function & data */
     pp_context->pp_x_steps = pp_null_x_steps;
     pp_context->pp_y_steps = pp_null_y_steps;
     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
+
+    dst_surface->flags = src_surface->flags;
+
+    return VA_STATUS_SUCCESS;
 }
 
 static int
@@ -581,170 +1720,93 @@ pp_load_save_y_steps(void *private_context)
 static int
 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
 {
-    pp_inline_parameter.grf5.block_vertical_mask = 0xff;
-    pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
-    pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
-    pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+    struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
+
+    pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
+    pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
 
     return 0;
 }
 
-static void
-pp_nv12_load_save_initialize(VADriverContextP ctx,
-                             VASurfaceID in_surface_id, VASurfaceID out_surface_id,
-                             const VARectangle *src_rect, const VARectangle *dst_rect)
+static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
+{
+    int i;
+    /* x offset of dest surface must be dword aligned.
+     * so we have to extend dst surface on left edge, and mask out pixels not interested
+     */
+    if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
+        pp_context->block_horizontal_mask_left = 0;
+        for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
+        {
+            pp_context->block_horizontal_mask_left |= 1<<i;
+        }
+    }
+    else {
+        pp_context->block_horizontal_mask_left = 0xffff;
+    }
+    
+    int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
+    if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
+        pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
+    }
+    else {
+        pp_context->block_horizontal_mask_right = 0xffff;
+    }
+    
+    if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
+        pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
+    }
+    else {
+        pp_context->block_vertical_mask_bottom = 0xff;
+    }
+
+}
+static VAStatus
+pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                                const struct i965_surface *src_surface,
+                                const VARectangle *src_rect,
+                                struct i965_surface *dst_surface,
+                                const VARectangle *dst_rect,
+                                void *filter_param)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_post_processing_context *pp_context = i965->pp_context;
     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
-    struct object_surface *obj_surface;
-    struct i965_surface_state *ss;
-    dri_bo *bo;
-    int index, w, h;
-    int orig_w, orig_h;
-    unsigned int tiling, swizzle;
+    int width[3], height[3], pitch[3], offset[3];
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+    struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
 
     /* source surface */
-    obj_surface = SURFACE(in_surface_id);
-    orig_w = obj_surface->orig_width;
-    orig_h = obj_surface->orig_height;
-    w = obj_surface->width;
-    h = obj_surface->height;
-    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
-
-    /* source Y surface index 1 */
-    index = 1;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
-    ss->ss2.width = orig_w / 4 - 1;
-    ss->ss2.height = orig_h - 1;
-    ss->ss3.pitch = w - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      0,
-                      0,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
-
-    /* source UV surface index 2 */
-    index = 2;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
-    ss->ss2.width = orig_w / 4 - 1;
-    ss->ss2.height = orig_h / 2 - 1;
-    ss->ss3.pitch = w - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      0,
-                      w * h,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
+                                    width, height, pitch, offset);
 
     /* destination surface */
-    obj_surface = SURFACE(out_surface_id);
-    orig_w = obj_surface->orig_width;
-    orig_h = obj_surface->orig_height;
-    w = obj_surface->width;
-    h = obj_surface->height;
-    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
-
-    /* destination Y surface index 7 */
-    index = 7;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
-    ss->ss2.width = orig_w / 4 - 1;
-    ss->ss2.height = orig_h - 1;
-    ss->ss3.pitch = w - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      I915_GEM_DOMAIN_RENDER,
-                      0,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
-
-    /* destination UV surface index 8 */
-    index = 8;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
-    ss->ss2.width = orig_w / 4 - 1;
-    ss->ss2.height = orig_h / 2 - 1;
-    ss->ss3.pitch = w - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      I915_GEM_DOMAIN_RENDER,
-                      w * h,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
+                                    width, height, pitch, offset);
 
     /* private function & data */
     pp_context->pp_x_steps = pp_load_save_x_steps;
     pp_context->pp_y_steps = pp_load_save_y_steps;
     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
-    pp_load_save_context->dest_h = h;
-    pp_load_save_context->dest_w = w;
 
-    pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
-    pp_inline_parameter.grf5.number_blocks = w / 16;
+    int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
+    pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
+    pp_load_save_context->dest_y = dst_rect->y;
+    pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
+    pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
+
+    pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
+    pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
+
+    pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
+    pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
+
+    // update u/v offset for packed yuv
+    i965_update_src_surface_uv_offset (ctx, pp_context, src_surface);
+    i965_update_dst_surface_uv_offset (ctx, pp_context, dst_surface);
+
+    dst_surface->flags = src_surface->flags;
+
+    return VA_STATUS_SUCCESS;
 }
 
 static int
@@ -765,165 +1827,73 @@ static int
 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
 {
     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
-    float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
-    float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
-
-    pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
-    pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
-    pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
-    pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+    struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+    float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+    float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
+
+    pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
+    pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
+    pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
+    pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
     
     return 0;
 }
 
-static void
-pp_nv12_scaling_initialize(VADriverContextP ctx,
-                           VASurfaceID in_surface_id, VASurfaceID out_surface_id,
-                           const VARectangle *src_rect, const VARectangle *dst_rect)
+static VAStatus
+pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                           const struct i965_surface *src_surface,
+                           const VARectangle *src_rect,
+                           struct i965_surface *dst_surface,
+                           const VARectangle *dst_rect,
+                           void *filter_param)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_post_processing_context *pp_context = i965->pp_context;
     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+    struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
     struct object_surface *obj_surface;
     struct i965_sampler_state *sampler_state;
-    struct i965_surface_state *ss;
-    dri_bo *bo;
-    int index;
     int in_w, in_h, in_wpitch, in_hpitch;
     int out_w, out_h, out_wpitch, out_hpitch;
-    unsigned int tiling, swizzle;
 
     /* source surface */
-    obj_surface = SURFACE(in_surface_id);
+    obj_surface = SURFACE(src_surface->id);
     in_w = obj_surface->orig_width;
     in_h = obj_surface->orig_height;
     in_wpitch = obj_surface->width;
     in_hpitch = obj_surface->height;
-    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
 
     /* source Y surface index 1 */
-    index = 1;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
-    ss->ss2.width = in_w - 1;
-    ss->ss2.height = in_h - 1;
-    ss->ss3.pitch = in_wpitch - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      0,
-                      0,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, 0,
+                              in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
+                              1, 0);
 
     /* source UV surface index 2 */
-    index = 2;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + in_wpitch * in_hpitch;
-    ss->ss2.width = in_w / 2 - 1;
-    ss->ss2.height = in_h / 2 - 1;
-    ss->ss3.pitch = in_wpitch - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      0,
-                      in_wpitch * in_hpitch,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, in_wpitch * in_hpitch,
+                              in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
+                              2, 0);
 
     /* destination surface */
-    obj_surface = SURFACE(out_surface_id);
+    obj_surface = SURFACE(dst_surface->id);
     out_w = obj_surface->orig_width;
     out_h = obj_surface->orig_height;
     out_wpitch = obj_surface->width;
     out_hpitch = obj_surface->height;
-    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
 
     /* destination Y surface index 7 */
-    index = 7;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
-    ss->ss2.width = out_w / 4 - 1;
-    ss->ss2.height = out_h - 1;
-    ss->ss3.pitch = out_wpitch - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      I915_GEM_DOMAIN_RENDER,
-                      0,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, 0,
+                              out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
+                              7, 1);
 
     /* destination UV surface index 8 */
-    index = 8;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + out_wpitch * out_hpitch;
-    ss->ss2.width = out_w / 4 - 1;
-    ss->ss2.height = out_h / 2 - 1;
-    ss->ss3.pitch = out_wpitch - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      I915_GEM_DOMAIN_RENDER,
-                      out_wpitch * out_hpitch,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, out_wpitch * out_hpitch,
+                              out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
+                              8, 1);
 
     /* sampler state */
     dri_bo_map(pp_context->sampler_state_table.bo, True);
@@ -951,20 +1921,24 @@ pp_nv12_scaling_initialize(VADriverContextP ctx,
     pp_context->pp_y_steps = pp_scaling_y_steps;
     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
 
-    pp_scaling_context->dest_x = dst_rect->x;
+    int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
+    float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
+    pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
     pp_scaling_context->dest_y = dst_rect->y;
-    pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
-    pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
-    pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
-    pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
+    pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
+    pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
+    pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
+    pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
+
+    pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
+
+    pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
+    pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
+    pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
 
-    pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
+    dst_surface->flags = src_surface->flags;
 
-    pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
-    pp_inline_parameter.grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
-    pp_inline_parameter.grf5.number_blocks = pp_scaling_context->dest_w / 16;
-    pp_inline_parameter.grf5.block_vertical_mask = 0xff;
-    pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
+    return VA_STATUS_SUCCESS;
 }
 
 static int
@@ -985,20 +1959,25 @@ static int
 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
 {
     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+    struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
     float src_x_steping, src_y_steping, video_step_delta;
     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
 
-    if (tmp_w >= pp_avs_context->dest_w) {
-        pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
-        pp_inline_parameter.grf6.video_step_delta = 0;
+    if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
+        src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+        pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
+    } else if (tmp_w >= pp_avs_context->dest_w) {
+        pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
+        pp_inline_parameter->grf6.video_step_delta = 0;
         
         if (x == 0) {
-            pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
+            pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
                 pp_avs_context->src_normalized_x;
         } else {
-            src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
-            video_step_delta = pp_inline_parameter.grf6.video_step_delta;
-            pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
+            src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+            video_step_delta = pp_inline_parameter->grf6.video_step_delta;
+            pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
                 16 * 15 * video_step_delta / 2;
         }
     } else {
@@ -1014,15 +1993,15 @@ pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int
         f = (float) n2 * 16 / tmp_w;
         
         if (n0 < 5) {
-            pp_inline_parameter.grf6.video_step_delta = 0.0;
+            pp_inline_parameter->grf6.video_step_delta = 0.0;
 
             if (x == 0) {
-                pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
-                pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
+                pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
+                pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
             } else {
-                src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
-                video_step_delta = pp_inline_parameter.grf6.video_step_delta;
-                pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
+                src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+                video_step_delta = pp_inline_parameter->grf6.video_step_delta;
+                pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
                     16 * 15 * video_step_delta / 2;
             }
         } else {
@@ -1031,218 +2010,160 @@ pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int
                 float a = f / (nls_left * 16 * factor_b);
                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
                 
-                pp_inline_parameter.grf6.video_step_delta = b;
+                pp_inline_parameter->grf6.video_step_delta = b;
 
                 if (x == 0) {
-                    pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
-                    pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
+                    pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
+                    pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
                 } else {
-                    src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
-                    video_step_delta = pp_inline_parameter.grf6.video_step_delta;
-                    pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
+                    src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+                    video_step_delta = pp_inline_parameter->grf6.video_step_delta;
+                    pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
                         16 * 15 * video_step_delta / 2;
-                    pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
+                    pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
                 }
             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
                 /* scale the center linearly */
-                src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
-                video_step_delta = pp_inline_parameter.grf6.video_step_delta;
-                pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
+                src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+                video_step_delta = pp_inline_parameter->grf6.video_step_delta;
+                pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
                     16 * 15 * video_step_delta / 2;
-                pp_inline_parameter.grf6.video_step_delta = 0.0;
-                pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
+                pp_inline_parameter->grf6.video_step_delta = 0.0;
+                pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
             } else {
                 float a = f / (nls_right * 16 * factor_b);
                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
 
-                src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
-                video_step_delta = pp_inline_parameter.grf6.video_step_delta;
-                pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
+                src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+                video_step_delta = pp_inline_parameter->grf6.video_step_delta;
+                pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
                     16 * 15 * video_step_delta / 2;
-                pp_inline_parameter.grf6.video_step_delta = -b;
+                pp_inline_parameter->grf6.video_step_delta = -b;
 
                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
-                    pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
+                    pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
                 else
-                    pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
+                    pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
             }
         }
     }
 
-    src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
-    pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
-    pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
-    pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
+    src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
+    pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
+    pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
+    pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
 
     return 0;
 }
 
-static void
-pp_nv12_avs_initialize(VADriverContextP ctx,
-                       VASurfaceID in_surface_id, VASurfaceID out_surface_id,
-                       const VARectangle *src_rect, const VARectangle *dst_rect)
+static VAStatus
+pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                       const struct i965_surface *src_surface,
+                       const VARectangle *src_rect,
+                       struct i965_surface *dst_surface,
+                       const VARectangle *dst_rect,
+                       void *filter_param,
+                       int nlas)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_post_processing_context *pp_context = i965->pp_context;
     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+    struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
     struct object_surface *obj_surface;
-    struct i965_surface_state *ss;
     struct i965_sampler_8x8 *sampler_8x8;
     struct i965_sampler_8x8_state *sampler_8x8_state;
-    struct i965_surface_state2 *ss_8x8;
-    dri_bo *bo;
     int index;
     int in_w, in_h, in_wpitch, in_hpitch;
     int out_w, out_h, out_wpitch, out_hpitch;
-    unsigned int tiling, swizzle;
+    int i;
 
     /* surface */
-    obj_surface = SURFACE(in_surface_id);
+    obj_surface = SURFACE(src_surface->id);
     in_w = obj_surface->orig_width;
     in_h = obj_surface->orig_height;
     in_wpitch = obj_surface->width;
     in_hpitch = obj_surface->height;
-    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
 
     /* source Y surface index 1 */
-    index = 1;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "Y surface state for sample_8x8", 
-                      sizeof(struct i965_surface_state2), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss_8x8 = bo->virtual;
-    memset(ss_8x8, 0, sizeof(*ss_8x8));
-    ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
-    ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
-    ss_8x8->ss1.width = in_w - 1;
-    ss_8x8->ss1.height = in_h - 1;
-    ss_8x8->ss2.half_pitch_for_chroma = 0;
-    ss_8x8->ss2.pitch = in_wpitch - 1;
-    ss_8x8->ss2.interleave_chroma = 0;
-    ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
-    ss_8x8->ss3.x_offset_for_cb = 0;
-    ss_8x8->ss3.y_offset_for_cb = 0;
-    pp_set_surface2_tiling(ss_8x8, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      0,
-                      0,
-                      offsetof(struct i965_surface_state2, ss0),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    i965_pp_set_surface2_state(ctx, pp_context,
+                               obj_surface->bo, 0,
+                               in_w, in_h, in_wpitch,
+                               0, 0,
+                               SURFACE_FORMAT_Y8_UNORM, 0,
+                               1);
 
     /* source UV surface index 2 */
-    index = 2;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "UV surface state for sample_8x8", 
-                      sizeof(struct i965_surface_state2), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss_8x8 = bo->virtual;
-    memset(ss_8x8, 0, sizeof(*ss_8x8));
-    ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + in_wpitch * in_hpitch;
-    ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
-    ss_8x8->ss1.width = in_w - 1;
-    ss_8x8->ss1.height = in_h - 1;
-    ss_8x8->ss2.half_pitch_for_chroma = 0;
-    ss_8x8->ss2.pitch = in_wpitch - 1;
-    ss_8x8->ss2.interleave_chroma = 1;
-    ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
-    ss_8x8->ss3.x_offset_for_cb = 0;
-    ss_8x8->ss3.y_offset_for_cb = 0;
-    pp_set_surface2_tiling(ss_8x8, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      0,
-                      in_wpitch * in_hpitch,
-                      offsetof(struct i965_surface_state2, ss0),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    i965_pp_set_surface2_state(ctx, pp_context,
+                               obj_surface->bo, in_wpitch * in_hpitch,
+                               in_w / 2, in_h / 2, in_wpitch,
+                               0, 0,
+                               SURFACE_FORMAT_R8B8_UNORM, 0,
+                               2);
 
     /* destination surface */
-    obj_surface = SURFACE(out_surface_id);
+    obj_surface = SURFACE(dst_surface->id);
     out_w = obj_surface->orig_width;
     out_h = obj_surface->orig_height;
     out_wpitch = obj_surface->width;
     out_hpitch = obj_surface->height;
     assert(out_w <= out_wpitch && out_h <= out_hpitch);
-    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
 
     /* destination Y surface index 7 */
-    index = 7;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
-    ss->ss2.width = out_w / 4 - 1;
-    ss->ss2.height = out_h - 1;
-    ss->ss3.pitch = out_wpitch - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      I915_GEM_DOMAIN_RENDER,
-                      0,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, 0,
+                              out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
+                              7, 1);
 
     /* destination UV surface index 8 */
-    index = 8;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + out_wpitch * out_hpitch;
-    ss->ss2.width = out_w / 4 - 1;
-    ss->ss2.height = out_h / 2 - 1;
-    ss->ss3.pitch = out_wpitch - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      I915_GEM_DOMAIN_RENDER,
-                      out_wpitch * out_hpitch,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
-    
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, out_wpitch * out_hpitch,
+                              out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
+                              8, 1);
+
     /* sampler 8x8 state */
     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
     assert(pp_context->sampler_state_table.bo_8x8->virtual);
     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
+
+    for (i = 0; i < 17; i++) {
+        /* for Y channel, currently ignore */
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
+        /* for U/V channel, 0.25 */
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
+    }
+
     sampler_8x8_state->dw136.default_sharpness_level = 0;
     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
@@ -1259,7 +2180,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx,
     index = 1;
     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
-    sampler_8x8[index].dw0.ief_bypass = 0;
+    sampler_8x8[index].dw0.ief_bypass = 1;
     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
@@ -1317,27 +2238,16 @@ pp_nv12_avs_initialize(VADriverContextP ctx,
                       0,
                       0,
                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
-                      pp_context->sampler_state_table.bo_8x8);
-
-    dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
-    assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
-    assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
-    sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
-    memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
-    sampler_8x8_state->dw136.default_sharpness_level = 0;
-    sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
-    sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
-    sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
-    dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
+                      pp_context->sampler_state_table.bo_8x8);
 
     /* sample_8x8 UV index 2 */
     index = 2;
     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
-    sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
-    sampler_8x8[index].dw0.ief_bypass = 0;
+    sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
+    sampler_8x8[index].dw0.ief_bypass = 1;
     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
-    sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
+    sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
     sampler_8x8[index].dw2.global_noise_estimation = 22;
     sampler_8x8[index].dw2.strong_edge_threshold = 8;
     sampler_8x8[index].dw2.weak_edge_threshold = 1;
@@ -1392,7 +2302,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx,
                       0,
                       0,
                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
-                      pp_context->sampler_state_table.bo_8x8_uv);
+                      pp_context->sampler_state_table.bo_8x8);
 
     dri_bo_unmap(pp_context->sampler_state_table.bo);
 
@@ -1401,74 +2311,564 @@ pp_nv12_avs_initialize(VADriverContextP ctx,
     pp_context->pp_y_steps = pp_avs_y_steps;
     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
 
-    pp_avs_context->dest_x = dst_rect->x;
-    pp_avs_context->dest_y = dst_rect->y;
-    pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
-    pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
-    pp_avs_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
-    pp_avs_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
-    pp_avs_context->src_w = src_rect->width;
-    pp_avs_context->src_h = src_rect->height;
+    int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
+    float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
+    pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
+    pp_avs_context->dest_y = dst_rect->y;
+    pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
+    pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
+    pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
+    pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
+    pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
+    pp_avs_context->src_h = src_rect->height;
+
+    pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
+    pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
+
+    pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
+    pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
+    pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
+    pp_inline_parameter->grf6.video_step_delta = 0.0;
+
+    dst_surface->flags = src_surface->flags;
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                            const struct i965_surface *src_surface,
+                            const VARectangle *src_rect,
+                            struct i965_surface *dst_surface,
+                            const VARectangle *dst_rect,
+                            void *filter_param)
+{
+    return pp_nv12_avs_initialize(ctx, pp_context,
+                                  src_surface,
+                                  src_rect,
+                                  dst_surface,
+                                  dst_rect,
+                                  filter_param,
+                                  1);
+}
+
+static VAStatus
+gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                             const struct i965_surface *src_surface,
+                             const VARectangle *src_rect,
+                             struct i965_surface *dst_surface,
+                             const VARectangle *dst_rect,
+                             void *filter_param)
+{
+    return pp_nv12_avs_initialize(ctx, pp_context,
+                                  src_surface,
+                                  src_rect,
+                                  dst_surface,
+                                  dst_rect,
+                                  filter_param,
+                                  0);    
+}
+
+static int
+gen7_pp_avs_x_steps(void *private_context)
+{
+    struct pp_avs_context *pp_avs_context = private_context;
+
+    return pp_avs_context->dest_w / 16;
+}
+
+static int
+gen7_pp_avs_y_steps(void *private_context)
+{
+    struct pp_avs_context *pp_avs_context = private_context;
+
+    return pp_avs_context->dest_h / 16;
+}
+
+static int
+gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+{
+    struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+    struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+    pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
+    pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
+    pp_inline_parameter->grf7.constant_0 = 0xffffffff;
+    pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
+
+    return 0;
+}
+
+static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
+                                              struct i965_post_processing_context *pp_context,
+                                              const struct i965_surface *surface)
+{
+    struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+    int fourcc = pp_get_surface_fourcc(ctx, surface);
+    
+    if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
+        pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
+        pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
+        pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
+    } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+        pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
+        pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
+        pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
+    }
+}
+
+static VAStatus
+gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                           const struct i965_surface *src_surface,
+                           const VARectangle *src_rect,
+                           struct i965_surface *dst_surface,
+                           const VARectangle *dst_rect,
+                           void *filter_param)
+{
+    struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+    struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+    struct gen7_sampler_8x8 *sampler_8x8;
+    struct i965_sampler_8x8_state *sampler_8x8_state;
+    int index, i;
+    int width[3], height[3], pitch[3], offset[3];
+    int src_height;
+
+    /* source surface */
+    gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
+                                         width, height, pitch, offset);
+    src_height = height[0];
+
+    /* destination surface */
+    gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
+                                         width, height, pitch, offset);
+
+    /* sampler 8x8 state */
+    dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
+    assert(pp_context->sampler_state_table.bo_8x8->virtual);
+    assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
+    sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
+    memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
+
+    for (i = 0; i < 17; i++) {
+        /* for Y channel, currently ignore */
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
+        /* for U/V channel, 0.25 */
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
+    }
+
+    sampler_8x8_state->dw136.default_sharpness_level = 0;
+    sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
+    sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
+    sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
+    dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
+
+    /* sampler 8x8 */
+    dri_bo_map(pp_context->sampler_state_table.bo, True);
+    assert(pp_context->sampler_state_table.bo->virtual);
+    assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
+    sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
+
+    /* sample_8x8 Y index 4 */
+    index = 4;
+    memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
+    sampler_8x8[index].dw0.global_noise_estimation = 255;
+    sampler_8x8[index].dw0.ief_bypass = 1;
+
+    sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
+
+    sampler_8x8[index].dw2.weak_edge_threshold = 1;
+    sampler_8x8[index].dw2.strong_edge_threshold = 8;
+    sampler_8x8[index].dw2.r5x_coefficient = 9;
+    sampler_8x8[index].dw2.r5cx_coefficient = 8;
+    sampler_8x8[index].dw2.r5c_coefficient = 3;
+
+    sampler_8x8[index].dw3.r3x_coefficient = 27;
+    sampler_8x8[index].dw3.r3c_coefficient = 5;
+    sampler_8x8[index].dw3.gain_factor = 40;
+    sampler_8x8[index].dw3.non_edge_weight = 1;
+    sampler_8x8[index].dw3.regular_weight = 2;
+    sampler_8x8[index].dw3.strong_edge_weight = 7;
+    sampler_8x8[index].dw3.ief4_smooth_enable = 0;
+
+    dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
+                      I915_GEM_DOMAIN_RENDER, 
+                      0,
+                      0,
+                      sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
+                      pp_context->sampler_state_table.bo_8x8);
+
+    /* sample_8x8 UV index 8 */
+    index = 8;
+    memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
+    sampler_8x8[index].dw0.disable_8x8_filter = 0;
+    sampler_8x8[index].dw0.global_noise_estimation = 255;
+    sampler_8x8[index].dw0.ief_bypass = 1;
+    sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
+    sampler_8x8[index].dw2.weak_edge_threshold = 1;
+    sampler_8x8[index].dw2.strong_edge_threshold = 8;
+    sampler_8x8[index].dw2.r5x_coefficient = 9;
+    sampler_8x8[index].dw2.r5cx_coefficient = 8;
+    sampler_8x8[index].dw2.r5c_coefficient = 3;
+    sampler_8x8[index].dw3.r3x_coefficient = 27;
+    sampler_8x8[index].dw3.r3c_coefficient = 5;
+    sampler_8x8[index].dw3.gain_factor = 40;
+    sampler_8x8[index].dw3.non_edge_weight = 1;
+    sampler_8x8[index].dw3.regular_weight = 2;
+    sampler_8x8[index].dw3.strong_edge_weight = 7;
+    sampler_8x8[index].dw3.ief4_smooth_enable = 0;
+
+    dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
+                      I915_GEM_DOMAIN_RENDER, 
+                      0,
+                      0,
+                      sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
+                      pp_context->sampler_state_table.bo_8x8);
+
+    /* sampler_8x8 V, index 12 */
+    index = 12;
+    memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
+    sampler_8x8[index].dw0.disable_8x8_filter = 0;
+    sampler_8x8[index].dw0.global_noise_estimation = 255;
+    sampler_8x8[index].dw0.ief_bypass = 1;
+    sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
+    sampler_8x8[index].dw2.weak_edge_threshold = 1;
+    sampler_8x8[index].dw2.strong_edge_threshold = 8;
+    sampler_8x8[index].dw2.r5x_coefficient = 9;
+    sampler_8x8[index].dw2.r5cx_coefficient = 8;
+    sampler_8x8[index].dw2.r5c_coefficient = 3;
+    sampler_8x8[index].dw3.r3x_coefficient = 27;
+    sampler_8x8[index].dw3.r3c_coefficient = 5;
+    sampler_8x8[index].dw3.gain_factor = 40;
+    sampler_8x8[index].dw3.non_edge_weight = 1;
+    sampler_8x8[index].dw3.regular_weight = 2;
+    sampler_8x8[index].dw3.strong_edge_weight = 7;
+    sampler_8x8[index].dw3.ief4_smooth_enable = 0;
+
+    dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
+                      I915_GEM_DOMAIN_RENDER, 
+                      0,
+                      0,
+                      sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
+                      pp_context->sampler_state_table.bo_8x8);
+
+    dri_bo_unmap(pp_context->sampler_state_table.bo);
+
+    /* private function & data */
+    pp_context->pp_x_steps = gen7_pp_avs_x_steps;
+    pp_context->pp_y_steps = gen7_pp_avs_y_steps;
+    pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
+
+    pp_avs_context->dest_x = dst_rect->x;
+    pp_avs_context->dest_y = dst_rect->y;
+    pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
+    pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
+    pp_avs_context->src_w = src_rect->width;
+    pp_avs_context->src_h = src_rect->height;
+
+    int dw = (pp_avs_context->src_w - 1) / 16 + 1;
+    dw = MAX(dw, pp_avs_context->dest_w);
+
+    pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
+    pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
+    pp_static_parameter->grf2.avs_wa_width = dw;
+    pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
+    pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
+
+    pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / pp_avs_context->dest_w;
+    pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
+    pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
+    pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / pp_avs_context->dest_w;
+
+    gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
+
+    dst_surface->flags = src_surface->flags;
+
+    return VA_STATUS_SUCCESS;
+}
+
+
+static int
+pp_dndi_x_steps(void *private_context)
+{
+    return 1;
+}
+
+static int
+pp_dndi_y_steps(void *private_context)
+{
+    struct pp_dndi_context *pp_dndi_context = private_context;
+
+    return pp_dndi_context->dest_h / 4;
+}
+
+static int
+pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+{
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+    pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
+    pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
+
+    return 0;
+}
+
+static 
+VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                             const struct i965_surface *src_surface,
+                             const VARectangle *src_rect,
+                             struct i965_surface *dst_surface,
+                             const VARectangle *dst_rect,
+                             void *filter_param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+    struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+    struct object_surface *obj_surface;
+    struct i965_sampler_dndi *sampler_dndi;
+    int index;
+    int w, h;
+    int orig_w, orig_h;
+    int dndi_top_first = 1;
+
+    if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
+        return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
+
+    if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
+        dndi_top_first = 1;
+    else
+        dndi_top_first = 0;
+
+    /* surface */
+    obj_surface = SURFACE(src_surface->id);
+    orig_w = obj_surface->orig_width;
+    orig_h = obj_surface->orig_height;
+    w = obj_surface->width;
+    h = obj_surface->height;
+
+    if (pp_context->stmm.bo == NULL) {
+        pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
+                                           "STMM surface",
+                                           w * h,
+                                           4096);
+        assert(pp_context->stmm.bo);
+    }
+
+    /* source UV surface index 2 */
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, w * h,
+                              orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+                              2, 0);
+
+    /* source YUV surface index 4 */
+    i965_pp_set_surface2_state(ctx, pp_context,
+                               obj_surface->bo, 0,
+                               orig_w, orig_h, w,
+                               0, h,
+                               SURFACE_FORMAT_PLANAR_420_8, 1,
+                               4);
+
+    /* source STMM surface index 20 */
+    i965_pp_set_surface_state(ctx, pp_context,
+                              pp_context->stmm.bo, 0,
+                              orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+                              20, 1);
+
+    /* destination surface */
+    obj_surface = SURFACE(dst_surface->id);
+    orig_w = obj_surface->orig_width;
+    orig_h = obj_surface->orig_height;
+    w = obj_surface->width;
+    h = obj_surface->height;
+
+    /* destination Y surface index 7 */
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, 0,
+                              orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+                              7, 1);
+
+    /* destination UV surface index 8 */
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, w * h,
+                              orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+                              8, 1);
+    /* sampler dndi */
+    dri_bo_map(pp_context->sampler_state_table.bo, True);
+    assert(pp_context->sampler_state_table.bo->virtual);
+    assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
+    sampler_dndi = pp_context->sampler_state_table.bo->virtual;
+
+    /* sample dndi index 1 */
+    index = 0;
+    sampler_dndi[index].dw0.denoise_asd_threshold = 0;
+    sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
+    sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
+    sampler_dndi[index].dw0.denoise_stad_threshold = 0;
+
+    sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
+    sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
+    sampler_dndi[index].dw1.stmm_c2 = 1;
+    sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
+    sampler_dndi[index].dw1.temporal_difference_threshold = 16;
+
+    sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
+    sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
+    sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
+    sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
+
+    sampler_dndi[index].dw3.maximum_stmm = 128;
+    sampler_dndi[index].dw3.multipler_for_vecm = 2;
+    sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
+    sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
+    sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
+
+    sampler_dndi[index].dw4.sdi_delta = 8;
+    sampler_dndi[index].dw4.sdi_threshold = 128;
+    sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
+    sampler_dndi[index].dw4.stmm_shift_up = 0;
+    sampler_dndi[index].dw4.stmm_shift_down = 0;
+    sampler_dndi[index].dw4.minimum_stmm = 0;
+
+    sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
+    sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
+    sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
+    sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
+
+    sampler_dndi[index].dw6.dn_enable = 1;
+    sampler_dndi[index].dw6.di_enable = 1;
+    sampler_dndi[index].dw6.di_partial = 0;
+    sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
+    sampler_dndi[index].dw6.dndi_stream_id = 0;
+    sampler_dndi[index].dw6.dndi_first_frame = 1;
+    sampler_dndi[index].dw6.progressive_dn = 0;
+    sampler_dndi[index].dw6.fmd_tear_threshold = 63;
+    sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
+    sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
+
+    sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
+    sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
+    sampler_dndi[index].dw7.vdi_walker_enable = 0;
+    sampler_dndi[index].dw7.column_width_minus1 = 0;
+
+    dri_bo_unmap(pp_context->sampler_state_table.bo);
+
+    /* private function & data */
+    pp_context->pp_x_steps = pp_dndi_x_steps;
+    pp_context->pp_y_steps = pp_dndi_y_steps;
+    pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
+
+    pp_static_parameter->grf1.statistics_surface_picth = w / 2;
+    pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
+    pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
+    pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
+
+    pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
+    pp_inline_parameter->grf5.number_blocks = w / 16;
+    pp_inline_parameter->grf5.block_vertical_mask = 0xff;
+    pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
 
-    pp_static_parameter.grf4.r4_2.avs.nlas = 1;
-    pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
+    pp_dndi_context->dest_w = w;
+    pp_dndi_context->dest_h = h;
 
-    pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
-    pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
-    pp_inline_parameter.grf5.number_blocks = pp_avs_context->dest_h / 8;
-    pp_inline_parameter.grf5.block_vertical_mask = 0xff;
-    pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
-    pp_inline_parameter.grf6.video_step_delta = 0.0;
+    dst_surface->flags = I965_SURFACE_FLAG_FRAME;
+
+    return VA_STATUS_SUCCESS;
 }
 
 static int
-pp_dndi_x_steps(void *private_context)
+pp_dn_x_steps(void *private_context)
 {
     return 1;
 }
 
 static int
-pp_dndi_y_steps(void *private_context)
+pp_dn_y_steps(void *private_context)
 {
-    struct pp_dndi_context *pp_dndi_context = private_context;
+    struct pp_dn_context *pp_dn_context = private_context;
 
-    return pp_dndi_context->dest_h / 4;
+    return pp_dn_context->dest_h / 8;
 }
 
 static int
-pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
 {
-    pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
-    pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+    pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
+    pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
 
     return 0;
 }
 
 static 
-void pp_nv12_dndi_initialize(VADriverContextP ctx,
-                             VASurfaceID in_surface_id, VASurfaceID out_surface_id,
-                             const VARectangle *src_rect, const VARectangle *dst_rect)
+VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                           const struct i965_surface *src_surface,
+                           const VARectangle *src_rect,
+                           struct i965_surface *dst_surface,
+                           const VARectangle *dst_rect,
+                           void *filter_param)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_post_processing_context *pp_context = i965->pp_context;
-    struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
+    struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
     struct object_surface *obj_surface;
-    struct i965_surface_state *ss;
-    struct i965_surface_state2 *ss_dndi;
     struct i965_sampler_dndi *sampler_dndi;
-    dri_bo *bo;
+    struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+ 
     int index;
     int w, h;
     int orig_w, orig_h;
-    unsigned int tiling, swizzle;
+    int dn_strength = 15;
+    int dndi_top_first = 1;
+    int dn_progressive = 0;
+
+    if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
+        dndi_top_first = 1;
+        dn_progressive = 1;
+    } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
+        dndi_top_first = 1;
+        dn_progressive = 0;
+    } else {
+        dndi_top_first = 0;
+        dn_progressive = 0;
+    }
 
     /* surface */
-    obj_surface = SURFACE(in_surface_id);
+    obj_surface = SURFACE(src_surface->id);
     orig_w = obj_surface->orig_width;
     orig_h = obj_surface->orig_height;
     w = obj_surface->width;
     h = obj_surface->height;
-    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
 
     if (pp_context->stmm.bo == NULL) {
         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
@@ -1479,165 +2879,44 @@ void pp_nv12_dndi_initialize(VADriverContextP ctx,
     }
 
     /* source UV surface index 2 */
-    index = 2;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
-    ss->ss2.width = orig_w / 4 - 1;
-    ss->ss2.height = orig_h / 2 - 1;
-    ss->ss3.pitch = w - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      0,
-                      w * h,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, w * h,
+                              orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+                              2, 0);
 
     /* source YUV surface index 4 */
-    index = 4;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "YUV surface state for deinterlace ", 
-                      sizeof(struct i965_surface_state2), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss_dndi = bo->virtual;
-    memset(ss_dndi, 0, sizeof(*ss_dndi));
-    ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
-    ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
-    ss_dndi->ss1.width = w - 1;
-    ss_dndi->ss1.height = h - 1;
-    ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
-    ss_dndi->ss2.half_pitch_for_chroma = 0;
-    ss_dndi->ss2.pitch = w - 1;
-    ss_dndi->ss2.interleave_chroma = 1;
-    ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
-    ss_dndi->ss2.half_pitch_for_chroma = 0;
-    ss_dndi->ss2.tiled_surface = 0;
-    ss_dndi->ss3.x_offset_for_cb = 0;
-    ss_dndi->ss3.y_offset_for_cb = h;
-    pp_set_surface2_tiling(ss_dndi, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      0,
-                      0,
-                      offsetof(struct i965_surface_state2, ss0),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    i965_pp_set_surface2_state(ctx, pp_context,
+                               obj_surface->bo, 0,
+                               orig_w, orig_h, w,
+                               0, h,
+                               SURFACE_FORMAT_PLANAR_420_8, 1,
+                               4);
 
     /* source STMM surface index 20 */
-    index = 20;
-    pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "STMM surface state for deinterlace ", 
-                      sizeof(struct i965_surface_state2), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
-    ss->ss2.width = w - 1;
-    ss->ss2.height = h - 1;
-    ss->ss3.pitch = w - 1;
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      I915_GEM_DOMAIN_RENDER,
-                      0,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    i965_pp_set_surface_state(ctx, pp_context,
+                              pp_context->stmm.bo, 0,
+                              orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+                              20, 1);
 
     /* destination surface */
-    obj_surface = SURFACE(out_surface_id);
+    obj_surface = SURFACE(dst_surface->id);
     orig_w = obj_surface->orig_width;
     orig_h = obj_surface->orig_height;
     w = obj_surface->width;
     h = obj_surface->height;
-    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
 
     /* destination Y surface index 7 */
-    index = 7;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
-    ss->ss2.width = orig_w / 4 - 1;
-    ss->ss2.height = orig_h - 1;
-    ss->ss3.pitch = w - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      I915_GEM_DOMAIN_RENDER,
-                      0,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, 0,
+                              orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+                              7, 1);
 
     /* destination UV surface index 8 */
-    index = 8;
-    pp_context->surfaces[index].s_bo = obj_surface->bo;
-    dri_bo_reference(pp_context->surfaces[index].s_bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 
-                      4096);
-    assert(bo);
-    pp_context->surfaces[index].ss_bo = bo;
-    dri_bo_map(bo, True);
-    assert(bo->virtual);
-    ss = bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
-    ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
-    ss->ss2.width = orig_w / 4 - 1;
-    ss->ss2.height = orig_h / 2 - 1;
-    ss->ss3.pitch = w - 1;
-    pp_set_surface_tiling(ss, tiling);
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_RENDER, 
-                      I915_GEM_DOMAIN_RENDER,
-                      w * h,
-                      offsetof(struct i965_surface_state, ss1),
-                      pp_context->surfaces[index].s_bo);
-    dri_bo_unmap(bo);
-
-    /* sampler dndi */
+    i965_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, w * h,
+                              orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+                              8, 1);
+    /* sampler dn */
     dri_bo_map(pp_context->sampler_state_table.bo, True);
     assert(pp_context->sampler_state_table.bo->virtual);
     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
@@ -1656,7 +2935,7 @@ void pp_nv12_dndi_initialize(VADriverContextP ctx,
     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
 
-    sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
+    sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
@@ -1680,57 +2959,514 @@ void pp_nv12_dndi_initialize(VADriverContextP ctx,
     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
 
     sampler_dndi[index].dw6.dn_enable = 1;
+    sampler_dndi[index].dw6.di_enable = 0;
+    sampler_dndi[index].dw6.di_partial = 0;
+    sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
+    sampler_dndi[index].dw6.dndi_stream_id = 1;
+    sampler_dndi[index].dw6.dndi_first_frame = 1;
+    sampler_dndi[index].dw6.progressive_dn = dn_progressive;
+    sampler_dndi[index].dw6.fmd_tear_threshold = 32;
+    sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
+    sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
+
+    sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
+    sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
+    sampler_dndi[index].dw7.vdi_walker_enable = 0;
+    sampler_dndi[index].dw7.column_width_minus1 = w / 16;
+
+    dri_bo_unmap(pp_context->sampler_state_table.bo);
+
+    /* private function & data */
+    pp_context->pp_x_steps = pp_dn_x_steps;
+    pp_context->pp_y_steps = pp_dn_y_steps;
+    pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
+
+    pp_static_parameter->grf1.statistics_surface_picth = w / 2;
+    pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
+    pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
+    pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
+
+    pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
+    pp_inline_parameter->grf5.number_blocks = w / 16;
+    pp_inline_parameter->grf5.block_vertical_mask = 0xff;
+    pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
+
+    pp_dn_context->dest_w = w;
+    pp_dn_context->dest_h = h;
+ 
+    return VA_STATUS_SUCCESS;
+}
+
+static int
+gen7_pp_dndi_x_steps(void *private_context)
+{
+    struct pp_dndi_context *pp_dndi_context = private_context;
+
+    return pp_dndi_context->dest_w / 16;
+}
+
+static int
+gen7_pp_dndi_y_steps(void *private_context)
+{
+    struct pp_dndi_context *pp_dndi_context = private_context;
+
+    return pp_dndi_context->dest_h / 4;
+}
+
+static int
+gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+{
+    struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+    pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
+    pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
+
+    return 0;
+}
+
+static VAStatus
+gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                             const struct i965_surface *src_surface,
+                             const VARectangle *src_rect,
+                             struct i965_surface *dst_surface,
+                             const VARectangle *dst_rect,
+                             void *filter_param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
+    struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+    struct object_surface *obj_surface;
+    struct gen7_sampler_dndi *sampler_dndi;
+    int index;
+    int w, h;
+    int orig_w, orig_h;
+    int dndi_top_first = 1;
+
+    if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
+        return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
+
+    if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
+        dndi_top_first = 1;
+    else
+        dndi_top_first = 0;
+
+    /* surface */
+    obj_surface = SURFACE(src_surface->id);
+    orig_w = obj_surface->orig_width;
+    orig_h = obj_surface->orig_height;
+    w = obj_surface->width;
+    h = obj_surface->height;
+
+    if (pp_context->stmm.bo == NULL) {
+        pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
+                                           "STMM surface",
+                                           w * h,
+                                           4096);
+        assert(pp_context->stmm.bo);
+    }
+
+    /* source UV surface index 1 */
+    gen7_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, w * h,
+                              orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+                              1, 0);
+
+    /* source YUV surface index 3 */
+    gen7_pp_set_surface2_state(ctx, pp_context,
+                               obj_surface->bo, 0,
+                               orig_w, orig_h, w,
+                               0, h,
+                               SURFACE_FORMAT_PLANAR_420_8, 1,
+                               3);
+
+    /* source (temporal reference) YUV surface index 4 */
+    gen7_pp_set_surface2_state(ctx, pp_context,
+                               obj_surface->bo, 0,
+                               orig_w, orig_h, w,
+                               0, h,
+                               SURFACE_FORMAT_PLANAR_420_8, 1,
+                               4);
+
+    /* STMM / History Statistics input surface, index 5 */
+    gen7_pp_set_surface_state(ctx, pp_context,
+                              pp_context->stmm.bo, 0,
+                              orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+                              5, 1);
+
+    /* destination surface */
+    obj_surface = SURFACE(dst_surface->id);
+    orig_w = obj_surface->orig_width;
+    orig_h = obj_surface->orig_height;
+    w = obj_surface->width;
+    h = obj_surface->height;
+
+    /* destination(Previous frame) Y surface index 27 */
+    gen7_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, 0,
+                              orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+                              27, 1);
+
+    /* destination(Previous frame) UV surface index 28 */
+    gen7_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, w * h,
+                              orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+                              28, 1);
+
+    /* destination(Current frame) Y surface index 30 */
+    gen7_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, 0,
+                              orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+                              30, 1);
+
+    /* destination(Current frame) UV surface index 31 */
+    gen7_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, w * h,
+                              orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+                              31, 1);
+
+    /* STMM output surface, index 33 */
+    gen7_pp_set_surface_state(ctx, pp_context,
+                              pp_context->stmm.bo, 0,
+                              orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+                              33, 1);
+
+
+    /* sampler dndi */
+    dri_bo_map(pp_context->sampler_state_table.bo, True);
+    assert(pp_context->sampler_state_table.bo->virtual);
+    assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
+    sampler_dndi = pp_context->sampler_state_table.bo->virtual;
+
+    /* sample dndi index 0 */
+    index = 0;
+    sampler_dndi[index].dw0.denoise_asd_threshold = 0;
+    sampler_dndi[index].dw0.dnmh_delt = 8;
+    sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
+    sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
+    sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
+    sampler_dndi[index].dw0.denoise_stad_threshold = 0;
+
+    sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
+    sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
+    sampler_dndi[index].dw1.stmm_c2 = 0;
+    sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
+    sampler_dndi[index].dw1.temporal_difference_threshold = 16;
+
+    sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
+    sampler_dndi[index].dw2.bne_edge_th = 1;
+    sampler_dndi[index].dw2.smooth_mv_th = 0;
+    sampler_dndi[index].dw2.sad_tight_th = 5;
+    sampler_dndi[index].dw2.cat_slope_minus1 = 9;
+    sampler_dndi[index].dw2.good_neighbor_th = 4;
+
+    sampler_dndi[index].dw3.maximum_stmm = 128;
+    sampler_dndi[index].dw3.multipler_for_vecm = 2;
+    sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
+    sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
+    sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
+
+    sampler_dndi[index].dw4.sdi_delta = 8;
+    sampler_dndi[index].dw4.sdi_threshold = 128;
+    sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
+    sampler_dndi[index].dw4.stmm_shift_up = 0;
+    sampler_dndi[index].dw4.stmm_shift_down = 0;
+    sampler_dndi[index].dw4.minimum_stmm = 0;
+
+    sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
+    sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
+    sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
+    sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
+
+    sampler_dndi[index].dw6.dn_enable = 0;
     sampler_dndi[index].dw6.di_enable = 1;
     sampler_dndi[index].dw6.di_partial = 0;
-    sampler_dndi[index].dw6.dndi_top_first = 1;
+    sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
     sampler_dndi[index].dw6.dndi_stream_id = 1;
     sampler_dndi[index].dw6.dndi_first_frame = 1;
     sampler_dndi[index].dw6.progressive_dn = 0;
+    sampler_dndi[index].dw6.mcdi_enable = 0;
     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
+    sampler_dndi[index].dw6.cat_th1 = 0;
     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
 
-    sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
-    sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
-    sampler_dndi[index].dw7.vdi_walker_enable = 0;
-    sampler_dndi[index].dw7.column_width_minus1 = w / 16;
+    sampler_dndi[index].dw7.sad_tha = 5;
+    sampler_dndi[index].dw7.sad_thb = 10;
+    sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
+    sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
+    sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
+    sampler_dndi[index].dw7.vdi_walker_enable = 0;
+    sampler_dndi[index].dw7.neighborpixel_th = 10;
+    sampler_dndi[index].dw7.column_width_minus1 = w / 16;
+
+    dri_bo_unmap(pp_context->sampler_state_table.bo);
+
+    /* private function & data */
+    pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
+    pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
+    pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
+
+    pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
+    pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
+    pp_static_parameter->grf1.di_top_field_first = 0;
+    pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
+
+    pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
+    pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
+    pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
+
+    pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
+    pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
+
+    pp_dndi_context->dest_w = w;
+    pp_dndi_context->dest_h = h;
+
+    dst_surface->flags = I965_SURFACE_FLAG_FRAME;
+
+    return VA_STATUS_SUCCESS;
+}
+
+static int
+gen7_pp_dn_x_steps(void *private_context)
+{
+    struct pp_dn_context *pp_dn_context = private_context;
+
+    return pp_dn_context->dest_w / 16;
+}
+
+static int
+gen7_pp_dn_y_steps(void *private_context)
+{
+    struct pp_dn_context *pp_dn_context = private_context;
+
+    return pp_dn_context->dest_h / 4;
+}
+
+static int
+gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+{
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+    pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
+    pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
+
+    return 0;
+}
+
+static VAStatus
+gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                           const struct i965_surface *src_surface,
+                           const VARectangle *src_rect,
+                           struct i965_surface *dst_surface,
+                           const VARectangle *dst_rect,
+                           void *filter_param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
+    struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+    struct object_surface *obj_surface;
+    struct gen7_sampler_dndi *sampler_dn;
+
+    int index;
+    int w, h;
+    int orig_w, orig_h;
+    int dn_strength = 15;
+    int dndi_top_first = 1;
+    int dn_progressive = 0;
+
+    if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
+        dndi_top_first = 1;
+        dn_progressive = 1;
+    } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
+        dndi_top_first = 1;
+        dn_progressive = 0;
+    } else {
+        dndi_top_first = 0;
+        dn_progressive = 0;
+    }
+
+    /* surface */
+    obj_surface = SURFACE(src_surface->id);
+    orig_w = obj_surface->orig_width;
+    orig_h = obj_surface->orig_height;
+    w = obj_surface->width;
+    h = obj_surface->height;
+
+    if (pp_context->stmm.bo == NULL) {
+        pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
+                                           "STMM surface",
+                                           w * h,
+                                           4096);
+        assert(pp_context->stmm.bo);
+    }
+
+    /* source UV surface index 1 */
+    gen7_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, w * h,
+                              orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+                              1, 0);
+
+    /* source YUV surface index 3 */
+    gen7_pp_set_surface2_state(ctx, pp_context,
+                               obj_surface->bo, 0,
+                               orig_w, orig_h, w,
+                               0, h,
+                               SURFACE_FORMAT_PLANAR_420_8, 1,
+                               3);
+
+    /* source (temporal reference) YUV surface index 4 */
+    gen7_pp_set_surface2_state(ctx, pp_context,
+                               obj_surface->bo, 0,
+                               orig_w, orig_h, w,
+                               0, h,
+                               SURFACE_FORMAT_PLANAR_420_8, 1,
+                               4);
+
+    /* STMM / History Statistics input surface, index 5 */
+    gen7_pp_set_surface_state(ctx, pp_context,
+                              pp_context->stmm.bo, 0,
+                              orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+                              5, 1);
+
+    /* destination surface */
+    obj_surface = SURFACE(dst_surface->id);
+    orig_w = obj_surface->orig_width;
+    orig_h = obj_surface->orig_height;
+    w = obj_surface->width;
+    h = obj_surface->height;
+
+    /* destination Y surface index 24 */
+    gen7_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, 0,
+                              orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+                              24, 1);
+
+    /* destination UV surface index 25 */
+    gen7_pp_set_surface_state(ctx, pp_context,
+                              obj_surface->bo, w * h,
+                              orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+                              25, 1);
+
+    /* sampler dn */
+    dri_bo_map(pp_context->sampler_state_table.bo, True);
+    assert(pp_context->sampler_state_table.bo->virtual);
+    assert(sizeof(*sampler_dn) == sizeof(int) * 8);
+    sampler_dn = pp_context->sampler_state_table.bo->virtual;
+
+    /* sample dn index 1 */
+    index = 0;
+    sampler_dn[index].dw0.denoise_asd_threshold = 0;
+    sampler_dn[index].dw0.dnmh_delt = 8;
+    sampler_dn[index].dw0.vdi_walker_y_stride = 0;
+    sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
+    sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
+    sampler_dn[index].dw0.denoise_stad_threshold = 0;
+
+    sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
+    sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
+    sampler_dn[index].dw1.stmm_c2 = 0;
+    sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
+    sampler_dn[index].dw1.temporal_difference_threshold = 16;
+
+    sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
+    sampler_dn[index].dw2.bne_edge_th = 1;
+    sampler_dn[index].dw2.smooth_mv_th = 0;
+    sampler_dn[index].dw2.sad_tight_th = 5;
+    sampler_dn[index].dw2.cat_slope_minus1 = 9;
+    sampler_dn[index].dw2.good_neighbor_th = 4;
+
+    sampler_dn[index].dw3.maximum_stmm = 128;
+    sampler_dn[index].dw3.multipler_for_vecm = 2;
+    sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
+    sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
+    sampler_dn[index].dw3.stmm_blending_constant_select = 0;
+
+    sampler_dn[index].dw4.sdi_delta = 8;
+    sampler_dn[index].dw4.sdi_threshold = 128;
+    sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
+    sampler_dn[index].dw4.stmm_shift_up = 0;
+    sampler_dn[index].dw4.stmm_shift_down = 0;
+    sampler_dn[index].dw4.minimum_stmm = 0;
+
+    sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
+    sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
+    sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
+    sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
+
+    sampler_dn[index].dw6.dn_enable = 1;
+    sampler_dn[index].dw6.di_enable = 0;
+    sampler_dn[index].dw6.di_partial = 0;
+    sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
+    sampler_dn[index].dw6.dndi_stream_id = 1;
+    sampler_dn[index].dw6.dndi_first_frame = 1;
+    sampler_dn[index].dw6.progressive_dn = dn_progressive;
+    sampler_dn[index].dw6.mcdi_enable = 0;
+    sampler_dn[index].dw6.fmd_tear_threshold = 32;
+    sampler_dn[index].dw6.cat_th1 = 0;
+    sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
+    sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
+
+    sampler_dn[index].dw7.sad_tha = 5;
+    sampler_dn[index].dw7.sad_thb = 10;
+    sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
+    sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
+    sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
+    sampler_dn[index].dw7.vdi_walker_enable = 0;
+    sampler_dn[index].dw7.neighborpixel_th = 10;
+    sampler_dn[index].dw7.column_width_minus1 = w / 16;
 
     dri_bo_unmap(pp_context->sampler_state_table.bo);
 
     /* private function & data */
-    pp_context->pp_x_steps = pp_dndi_x_steps;
-    pp_context->pp_y_steps = pp_dndi_y_steps;
-    pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
+    pp_context->pp_x_steps = gen7_pp_dn_x_steps;
+    pp_context->pp_y_steps = gen7_pp_dn_y_steps;
+    pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
 
-    pp_static_parameter.grf1.statistics_surface_picth = w / 2;
-    pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
-    pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
-    pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
+    pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
+    pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
+    pp_static_parameter->grf1.di_top_field_first = 0;
+    pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
 
-    pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
-    pp_inline_parameter.grf5.number_blocks = w / 16;
-    pp_inline_parameter.grf5.block_vertical_mask = 0xff;
-    pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
+    pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
+    pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
+    pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
 
-    pp_dndi_context->dest_w = w;
-    pp_dndi_context->dest_h = h;
+    pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
+    pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
+
+    pp_dn_context->dest_w = w;
+    pp_dn_context->dest_h = h;
+
+    dst_surface->flags = src_surface->flags;
+
+    return VA_STATUS_SUCCESS;
 }
 
-static void
+static VAStatus
 ironlake_pp_initialize(
     VADriverContextP   ctx,
-    VASurfaceID        in_surface_id,
-    VASurfaceID        out_surface_id,
+    struct i965_post_processing_context *pp_context,
+    const struct i965_surface *src_surface,
     const VARectangle *src_rect,
+    struct i965_surface *dst_surface,
     const VARectangle *dst_rect,
-    int                pp_index
+    int                pp_index,
+    void  *filter_param
 )
 {
+    VAStatus va_status;
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_post_processing_context *pp_context = i965->pp_context;
     struct pp_module *pp_module;
     dri_bo *bo;
-    int i;
+    int static_param_size, inline_param_size;
+
+    dri_bo_unreference(pp_context->surface_state_binding_table.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "surface state & binding table",
+                      (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
+                      4096);
+    assert(bo);
+    pp_context->surface_state_binding_table.bo = bo;
 
     dri_bo_unreference(pp_context->curbe.bo);
     bo = dri_bo_alloc(i965->intel.bufmgr,
@@ -1740,14 +3476,6 @@ ironlake_pp_initialize(
     assert(bo);
     pp_context->curbe.bo = bo;
 
-    dri_bo_unreference(pp_context->binding_table.bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "binding table",
-                      sizeof(unsigned int), 
-                      4096);
-    assert(bo);
-    pp_context->binding_table.bo = bo;
-
     dri_bo_unreference(pp_context->idrt.bo);
     bo = dri_bo_alloc(i965->intel.bufmgr, 
                       "interface discriptor", 
@@ -1791,56 +3519,85 @@ ironlake_pp_initialize(
                       4096);
     assert(bo);
     pp_context->vfe_state.bo = bo;
-    
-    for (i = 0; i < MAX_PP_SURFACES; i++) {
-        dri_bo_unreference(pp_context->surfaces[i].ss_bo);
-        pp_context->surfaces[i].ss_bo = NULL;
-
-        dri_bo_unreference(pp_context->surfaces[i].s_bo);
-        pp_context->surfaces[i].s_bo = NULL;
-    }
 
-    memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
-    memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
+    static_param_size = sizeof(struct pp_static_parameter);
+    inline_param_size = sizeof(struct pp_inline_parameter);
+    
+    memset(pp_context->pp_static_parameter, 0, static_param_size);
+    memset(pp_context->pp_inline_parameter, 0, inline_param_size);
+    
     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
     pp_context->current_pp = pp_index;
     pp_module = &pp_context->pp_modules[pp_index];
     
     if (pp_module->initialize)
-        pp_module->initialize(ctx, in_surface_id, out_surface_id,
-                              src_rect, dst_rect);
+        va_status = pp_module->initialize(ctx, pp_context,
+                                          src_surface,
+                                          src_rect,
+                                          dst_surface,
+                                          dst_rect,
+                                          filter_param);
+    else
+       va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
+ 
+    return va_status;
 }
 
-static void
+static VAStatus
 ironlake_post_processing(
     VADriverContextP   ctx,
-    VASurfaceID        in_surface_id,
-    VASurfaceID        out_surface_id,
+    struct i965_post_processing_context *pp_context,
+    const struct i965_surface *src_surface,
     const VARectangle *src_rect,
+    struct i965_surface *dst_surface,
     const VARectangle *dst_rect,
-    int                pp_index
+    int                pp_index,
+    void *filter_param
 )
 {
-    ironlake_pp_initialize(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
-    ironlake_pp_states_setup(ctx);
-    ironlake_pp_pipeline_setup(ctx);
+    VAStatus va_status;
+
+    va_status = ironlake_pp_initialize(ctx, pp_context,
+                                       src_surface,
+                                       src_rect,
+                                       dst_surface,
+                                       dst_rect,
+                                       pp_index,
+                                       filter_param);
+
+    if (va_status == VA_STATUS_SUCCESS) {
+        ironlake_pp_states_setup(ctx, pp_context);
+        ironlake_pp_pipeline_setup(ctx, pp_context);
+    }
+
+    return va_status;
 }
 
-static void
+static VAStatus
 gen6_pp_initialize(
     VADriverContextP   ctx,
-    VASurfaceID        in_surface_id,
-    VASurfaceID        out_surface_id,
+    struct i965_post_processing_context *pp_context,
+    const struct i965_surface *src_surface,
     const VARectangle *src_rect,
+    struct i965_surface *dst_surface,
     const VARectangle *dst_rect,
-    int                pp_index
+    int                pp_index,
+    void * filter_param
 )
 {
+    VAStatus va_status;
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_post_processing_context *pp_context = i965->pp_context;
     struct pp_module *pp_module;
     dri_bo *bo;
-    int i;
+    int static_param_size, inline_param_size;
+
+    dri_bo_unreference(pp_context->surface_state_binding_table.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "surface state & binding table",
+                      (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
+                      4096);
+    assert(bo);
+    pp_context->surface_state_binding_table.bo = bo;
 
     dri_bo_unreference(pp_context->curbe.bo);
     bo = dri_bo_alloc(i965->intel.bufmgr,
@@ -1850,14 +3607,6 @@ gen6_pp_initialize(
     assert(bo);
     pp_context->curbe.bo = bo;
 
-    dri_bo_unreference(pp_context->binding_table.bo);
-    bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "binding table",
-                      sizeof(unsigned int), 
-                      4096);
-    assert(bo);
-    pp_context->binding_table.bo = bo;
-
     dri_bo_unreference(pp_context->idrt.bo);
     bo = dri_bo_alloc(i965->intel.bufmgr, 
                       "interface discriptor", 
@@ -1902,57 +3651,41 @@ gen6_pp_initialize(
     assert(bo);
     pp_context->vfe_state.bo = bo;
     
-    for (i = 0; i < MAX_PP_SURFACES; i++) {
-        dri_bo_unreference(pp_context->surfaces[i].ss_bo);
-        pp_context->surfaces[i].ss_bo = NULL;
-
-        dri_bo_unreference(pp_context->surfaces[i].s_bo);
-        pp_context->surfaces[i].s_bo = NULL;
+    if (IS_GEN7(i965->intel.device_id)) {
+        static_param_size = sizeof(struct gen7_pp_static_parameter);
+        inline_param_size = sizeof(struct gen7_pp_inline_parameter);
+    } else {
+        static_param_size = sizeof(struct pp_static_parameter);
+        inline_param_size = sizeof(struct pp_inline_parameter);
     }
 
-    memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
-    memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
+    memset(pp_context->pp_static_parameter, 0, static_param_size);
+    memset(pp_context->pp_inline_parameter, 0, inline_param_size);
+
     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
     pp_context->current_pp = pp_index;
     pp_module = &pp_context->pp_modules[pp_index];
     
     if (pp_module->initialize)
-        pp_module->initialize(ctx, in_surface_id, out_surface_id,
-                              src_rect, dst_rect);
-}
-
-static void
-gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
-{
-    unsigned int *binding_table;
-    dri_bo *bo = pp_context->binding_table.bo;
-    int i;
-
-    dri_bo_map(bo, 1);
-    assert(bo->virtual);
-    binding_table = bo->virtual;
-    memset(binding_table, 0, bo->size);
-
-    for (i = 0; i < MAX_PP_SURFACES; i++) {
-        if (pp_context->surfaces[i].ss_bo) {
-            assert(pp_context->surfaces[i].s_bo);
-
-            binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
-            dri_bo_emit_reloc(bo,
-                              I915_GEM_DOMAIN_INSTRUCTION, 0,
-                              0,
-                              i * sizeof(*binding_table),
-                              pp_context->surfaces[i].ss_bo);
-        }
-    
-    }
+        va_status = pp_module->initialize(ctx, pp_context,
+                                          src_surface,
+                                          src_rect,
+                                          dst_surface,
+                                          dst_rect,
+                                          filter_param);
+    else
+        va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
+ 
+    calculate_boundary_block_mask(pp_context, dst_rect);
 
-    dri_bo_unmap(bo);
+    return va_status;
 }
 
 static void
-gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
+gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
+                                   struct i965_post_processing_context *pp_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct gen6_interface_descriptor_data *desc;
     dri_bo *bo;
     int pp_index = pp_context->current_pp;
@@ -1970,10 +3703,13 @@ gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_conte
     desc->desc2.sampler_state_pointer = 
         pp_context->sampler_state_table.bo->offset >> 5;
     desc->desc3.binding_table_entry_count = 0;
-    desc->desc3.binding_table_pointer = 
-        pp_context->binding_table.bo->offset >> 5; /*reloc */
+    desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
     desc->desc4.constant_urb_entry_read_offset = 0;
-    desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
+    
+    if (IS_GEN7(i965->intel.device_id))
+        desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
+    else
+        desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
 
     dri_bo_emit_reloc(bo,
                       I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -1987,45 +3723,46 @@ gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_conte
                       offsetof(struct gen6_interface_descriptor_data, desc2),
                       pp_context->sampler_state_table.bo);
 
-    dri_bo_emit_reloc(bo,
-                      I915_GEM_DOMAIN_INSTRUCTION, 0,
-                      desc->desc3.binding_table_entry_count,
-                      offsetof(struct gen6_interface_descriptor_data, desc3),
-                      pp_context->binding_table.bo);
-
     dri_bo_unmap(bo);
     pp_context->idrt.num_interface_descriptors++;
 }
 
 static void
-gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
+gen6_pp_upload_constants(VADriverContextP ctx,
+                         struct i965_post_processing_context *pp_context)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     unsigned char *constant_buffer;
+    int param_size;
+
+    assert(sizeof(struct pp_static_parameter) == 128);
+    assert(sizeof(struct gen7_pp_static_parameter) == 192);
+
+    if (IS_GEN7(i965->intel.device_id))
+        param_size = sizeof(struct gen7_pp_static_parameter);
+    else
+        param_size = sizeof(struct pp_static_parameter);
 
-    assert(sizeof(pp_static_parameter) == 128);
     dri_bo_map(pp_context->curbe.bo, 1);
     assert(pp_context->curbe.bo->virtual);
     constant_buffer = pp_context->curbe.bo->virtual;
-    memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
+    memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
     dri_bo_unmap(pp_context->curbe.bo);
 }
 
 static void
-gen6_pp_states_setup(VADriverContextP ctx)
+gen6_pp_states_setup(VADriverContextP ctx,
+                     struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_post_processing_context *pp_context = i965->pp_context;
-
-    gen6_pp_binding_table(pp_context);
-    gen6_pp_interface_descriptor_table(pp_context);
-    gen6_pp_upload_constants(pp_context);
+    gen6_pp_interface_descriptor_table(ctx, pp_context);
+    gen6_pp_upload_constants(ctx, pp_context);
 }
 
 static void
-gen6_pp_pipeline_select(VADriverContextP ctx)
+gen6_pp_pipeline_select(VADriverContextP ctx,
+                        struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
 
     BEGIN_BATCH(batch, 1);
     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
@@ -2033,15 +3770,15 @@ gen6_pp_pipeline_select(VADriverContextP ctx)
 }
 
 static void
-gen6_pp_state_base_address(VADriverContextP ctx)
+gen6_pp_state_base_address(VADriverContextP ctx,
+                           struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
 
     BEGIN_BATCH(batch, 10);
     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
-    OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+    OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
@@ -2053,10 +3790,10 @@ gen6_pp_state_base_address(VADriverContextP ctx)
 }
 
 static void
-gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+gen6_pp_vfe_state(VADriverContextP ctx,
+                  struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
 
     BEGIN_BATCH(batch, 8);
     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
@@ -2066,8 +3803,8 @@ gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_
               pp_context->urb.num_vfe_entries << 8);
     OUT_BATCH(batch, 0);
     OUT_BATCH(batch,
-              (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
-              (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
+              (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
+              (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
     OUT_BATCH(batch, 0);
     OUT_BATCH(batch, 0);
     OUT_BATCH(batch, 0);
@@ -2075,18 +3812,18 @@ gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_
 }
 
 static void
-gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+gen6_pp_curbe_load(VADriverContextP ctx,
+                   struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
 
-    assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
+    assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
 
     BEGIN_BATCH(batch, 4);
     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
     OUT_BATCH(batch, 0);
     OUT_BATCH(batch,
-              pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
+              pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
     OUT_RELOC(batch, 
               pp_context->curbe.bo,
               I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -2095,10 +3832,10 @@ gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp
 }
 
 static void
-gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+gen6_interface_descriptor_load(VADriverContextP ctx,
+                               struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
 
     BEGIN_BATCH(batch, 4);
     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
@@ -2112,87 +3849,215 @@ gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing
     ADVANCE_BATCH(batch);
 }
 
+static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
+{
+    struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+    pp_inline_parameter->grf5.block_vertical_mask = 0xff;
+    pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
+    // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
+    pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
+    pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
+    pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
+
+    /* 1 x N */
+    if (x_steps == 1) {
+        if (y == y_steps-1) {
+            pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
+        }
+        else {
+            pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
+        }
+    }
+
+    /* M x 1 */
+    if (y_steps == 1) {
+        if (x == 0) { // all blocks in this group are on the left edge
+            pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
+            pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
+        }
+        else if (x == x_steps-1) {
+            pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
+            pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
+        }
+        else {
+            pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
+            pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
+            pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
+        }
+    }
+
+}
+
 static void
-gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+gen6_pp_object_walker(VADriverContextP ctx,
+                      struct i965_post_processing_context *pp_context)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
+    struct intel_batchbuffer *batch = pp_context->batch;
     int x, x_steps, y, y_steps;
+    int param_size, command_length_in_dws;
+    dri_bo *command_buffer;
+    unsigned int *command_ptr;
+
+    if (IS_GEN7(i965->intel.device_id))
+        param_size = sizeof(struct gen7_pp_inline_parameter);
+    else
+        param_size = sizeof(struct pp_inline_parameter);
 
     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
+    command_length_in_dws = 6 + (param_size >> 2);
+    command_buffer = dri_bo_alloc(i965->intel.bufmgr,
+                                  "command objects buffer",
+                                  command_length_in_dws * 4 * x_steps * y_steps + 8,
+                                  4096);
+
+    dri_bo_map(command_buffer, 1);
+    command_ptr = command_buffer->virtual;
 
     for (y = 0; y < y_steps; y++) {
         for (x = 0; x < x_steps; x++) {
             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
-                BEGIN_BATCH(batch, 22);
-                OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
-                OUT_BATCH(batch, 0);
-                OUT_BATCH(batch, 0); /* no indirect data */
-                OUT_BATCH(batch, 0);
-                OUT_BATCH(batch, 0); /* scoreboard */
-                OUT_BATCH(batch, 0);
-
-                /* inline data grf 5-6 */
-                assert(sizeof(pp_inline_parameter) == 64);
-                intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
-
-                ADVANCE_BATCH(batch);
+                // some common block parameter update goes here, apply to all pp functions
+                if (IS_GEN6(i965->intel.device_id))
+                    update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
+                
+                *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
+                *command_ptr++ = 0;
+                *command_ptr++ = 0;
+                *command_ptr++ = 0;
+                *command_ptr++ = 0;
+                *command_ptr++ = 0;
+                memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
+                command_ptr += (param_size >> 2);
             }
         }
     }
-}
 
+    if (command_length_in_dws * x_steps * y_steps % 2 == 0)
+        *command_ptr++ = 0;
+
+    *command_ptr = MI_BATCH_BUFFER_END;
+
+    dri_bo_unmap(command_buffer);
+
+    BEGIN_BATCH(batch, 2);
+    OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
+    OUT_RELOC(batch, command_buffer, 
+              I915_GEM_DOMAIN_COMMAND, 0, 
+              0);
+    ADVANCE_BATCH(batch);
+    
+    dri_bo_unreference(command_buffer);
+
+    /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
+     * will cause control to pass back to ring buffer 
+     */
+    intel_batchbuffer_end_atomic(batch);
+    intel_batchbuffer_flush(batch);
+    intel_batchbuffer_start_atomic(batch, 0x1000);
+}
 static void
-gen6_pp_pipeline_setup(VADriverContextP ctx)
+gen6_pp_pipeline_setup(VADriverContextP ctx,
+                       struct i965_post_processing_context *pp_context)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct intel_batchbuffer *batch = i965->batch;
-    struct i965_post_processing_context *pp_context = i965->pp_context;
+    struct intel_batchbuffer *batch = pp_context->batch;
 
     intel_batchbuffer_start_atomic(batch, 0x1000);
     intel_batchbuffer_emit_mi_flush(batch);
-    gen6_pp_pipeline_select(ctx);
+    gen6_pp_pipeline_select(ctx, pp_context);
+    gen6_pp_state_base_address(ctx, pp_context);
+    gen6_pp_vfe_state(ctx, pp_context);
     gen6_pp_curbe_load(ctx, pp_context);
     gen6_interface_descriptor_load(ctx, pp_context);
-    gen6_pp_state_base_address(ctx);
     gen6_pp_vfe_state(ctx, pp_context);
     gen6_pp_object_walker(ctx, pp_context);
     intel_batchbuffer_end_atomic(batch);
 }
 
-static void
+static VAStatus
 gen6_post_processing(
     VADriverContextP   ctx,
-    VASurfaceID        in_surface_id,
-    VASurfaceID        out_surface_id,
+    struct i965_post_processing_context *pp_context,
+    const struct i965_surface *src_surface,
+    const VARectangle *src_rect,
+    struct i965_surface *dst_surface,
+    const VARectangle *dst_rect,
+    int                pp_index,
+    void *filter_param
+)
+{
+    VAStatus va_status;
+    
+    va_status = gen6_pp_initialize(ctx, pp_context,
+                                   src_surface,
+                                   src_rect,
+                                   dst_surface,
+                                   dst_rect,
+                                   pp_index,
+                                   filter_param);
+
+    if (va_status == VA_STATUS_SUCCESS) {
+        gen6_pp_states_setup(ctx, pp_context);
+        gen6_pp_pipeline_setup(ctx, pp_context);
+    }
+
+    return va_status;
+}
+
+static VAStatus
+gen75_post_processing(
+    VADriverContextP   ctx,
+    struct i965_post_processing_context *pp_context,
+    const struct i965_surface *src_surface,
     const VARectangle *src_rect,
+    struct i965_surface *dst_surface,
     const VARectangle *dst_rect,
-    int                pp_index
+    int                pp_index,
+    void *filter_param
 )
 {
-    gen6_pp_initialize(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
-    gen6_pp_states_setup(ctx);
-    gen6_pp_pipeline_setup(ctx);
+   VAStatus va_status;
+   struct intel_vebox_context * vebox_ctx = pp_context->pp_vebox_context;
+
+    assert(pp_index == PP_NV12_DNDI);
+    
+    vebox_ctx->filters_mask    = VPP_DNDI_DI;
+    vebox_ctx->surface_input   = src_surface->id;
+    vebox_ctx->surface_output  = dst_surface->id;
+  
+    va_status = gen75_vebox_process_picture(ctx, vebox_ctx);
+     
+    return va_status;
 }
 
-static void
+static VAStatus
 i965_post_processing_internal(
     VADriverContextP   ctx,
-    VASurfaceID        in_surface_id,
-    VASurfaceID        out_surface_id,
+    struct i965_post_processing_context *pp_context,
+    const struct i965_surface *src_surface,
     const VARectangle *src_rect,
+    struct i965_surface *dst_surface,
     const VARectangle *dst_rect,
-    int                pp_index
+    int                pp_index,
+    void *filter_param
 )
 {
+    VAStatus va_status;
     struct i965_driver_data *i965 = i965_driver_data(ctx);
 
-    if (IS_GEN6(i965->intel.device_id) ||
-        IS_GEN7(i965->intel.device_id))
-        gen6_post_processing(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
-    else
-        ironlake_post_processing(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
+    if(IS_HASWELL(i965->intel.device_id) && 
+        pp_index == PP_NV12_DNDI){
+        va_status = gen75_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
+    }else if (IS_GEN6(i965->intel.device_id) ||
+              IS_GEN7(i965->intel.device_id)){
+        va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
+    }else{
+        va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
+    }
+
+    return va_status;
 }
 
 VAStatus 
@@ -2206,6 +4071,103 @@ i965_CreateSurfaces(VADriverContextP ctx,
                     int format,
                     int num_surfaces,
                     VASurfaceID *surfaces);
+
+static void
+rgb_to_yuv(unsigned int argb,
+           unsigned char *y,
+           unsigned char *u,
+           unsigned char *v,
+           unsigned char *a)
+{
+    int r = ((argb >> 16) & 0xff);
+    int g = ((argb >> 8) & 0xff);
+    int b = ((argb >> 0) & 0xff);
+    
+    *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
+    *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
+    *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
+    *a = ((argb >> 24) & 0xff);
+}
+
+static void 
+i965_vpp_clear_surface(VADriverContextP ctx,
+                       struct i965_post_processing_context *pp_context,
+                       VASurfaceID surface,
+                       unsigned int color)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct intel_batchbuffer *batch = pp_context->batch;
+    struct object_surface *obj_surface = SURFACE(surface);
+    unsigned int blt_cmd, br13;
+    unsigned int tiling = 0, swizzle = 0;
+    int pitch;
+    unsigned char y, u, v, a = 0;
+
+    /* Currently only support NV12 surface */
+    if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
+        return;
+
+    rgb_to_yuv(color, &y, &u, &v, &a);
+
+    if (a == 0)
+        return;
+
+    dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+    blt_cmd = XY_COLOR_BLT_CMD;
+    pitch = obj_surface->width;
+
+    if (tiling != I915_TILING_NONE) {
+        blt_cmd |= XY_COLOR_BLT_DST_TILED;
+        pitch >>= 2;
+    }
+
+    br13 = 0xf0 << 16;
+    br13 |= BR13_8;
+    br13 |= pitch;
+
+    if (IS_GEN6(i965->intel.device_id) ||
+        IS_GEN7(i965->intel.device_id)) {
+        intel_batchbuffer_start_atomic_blt(batch, 48);
+        BEGIN_BLT_BATCH(batch, 12);
+    } else {
+        intel_batchbuffer_start_atomic(batch, 48);
+        BEGIN_BATCH(batch, 12);
+    }
+
+    OUT_BATCH(batch, blt_cmd);
+    OUT_BATCH(batch, br13);
+    OUT_BATCH(batch,
+              0 << 16 |
+              0);
+    OUT_BATCH(batch,
+              obj_surface->height << 16 |
+              obj_surface->width);
+    OUT_RELOC(batch, obj_surface->bo, 
+              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+              0);
+    OUT_BATCH(batch, y);
+
+    br13 = 0xf0 << 16;
+    br13 |= BR13_565;
+    br13 |= pitch;
+
+    OUT_BATCH(batch, blt_cmd);
+    OUT_BATCH(batch, br13);
+    OUT_BATCH(batch,
+              0 << 16 |
+              0);
+    OUT_BATCH(batch,
+              obj_surface->height / 2 << 16 |
+              obj_surface->width / 2);
+    OUT_RELOC(batch, obj_surface->bo, 
+              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+              obj_surface->width * obj_surface->y_cb_offset);
+    OUT_BATCH(batch, v << 8 | u);
+
+    ADVANCE_BATCH(batch);
+    intel_batchbuffer_end_atomic(batch);
+}
+
 VASurfaceID
 i965_post_processing(
     VADriverContextP   ctx,
@@ -2225,6 +4187,8 @@ i965_post_processing(
         if (i965->render_state.interleaved_uv) {
             struct object_surface *obj_surface;
             VAStatus status;
+            struct i965_surface src_surface;
+            struct i965_surface dst_surface;
 
             if (flags & I965_PP_FLAG_DEINTERLACING) {
                 obj_surface = SURFACE(in_surface_id);
@@ -2236,11 +4200,26 @@ i965_post_processing(
                                              &out_surface_id);
                 assert(status == VA_STATUS_SUCCESS);
                 obj_surface = SURFACE(out_surface_id);
-                i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
-                i965_post_processing_internal(ctx,
-                                              in_surface_id, out_surface_id,
-                                              src_rect, dst_rect,
-                                              PP_NV12_DNDI);
+                i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+
+                i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
+
+                src_surface.id = in_surface_id;
+                src_surface.type = I965_SURFACE_TYPE_SURFACE;
+                src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
+                    I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOM_FIELD_FIRST;
+                dst_surface.id = out_surface_id;
+                dst_surface.type = I965_SURFACE_TYPE_SURFACE;
+                dst_surface.flags = I965_SURFACE_FLAG_FRAME;
+
+                i965_post_processing_internal(ctx, i965->pp_context,
+                                              &src_surface,
+                                              src_rect,
+                                              &dst_surface,
+                                              dst_rect,
+                                              PP_NV12_DNDI,
+                                              NULL);
+               printf("Deinterlace is executed here\n");
             }
 
             if (flags & I965_PP_FLAG_AVS) {
@@ -2259,10 +4238,22 @@ i965_post_processing(
                 assert(status == VA_STATUS_SUCCESS);
                 obj_surface = SURFACE(out_surface_id);
                 i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
-                i965_post_processing_internal(ctx,
-                                              in_surface_id, out_surface_id,
-                                              src_rect, dst_rect,
-                                              PP_NV12_AVS);
+                i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
+
+                src_surface.id = in_surface_id;
+                src_surface.type = I965_SURFACE_TYPE_SURFACE;
+                src_surface.flags = I965_SURFACE_FLAG_FRAME;
+                dst_surface.id = out_surface_id;
+                dst_surface.type = I965_SURFACE_TYPE_SURFACE;
+                dst_surface.flags = I965_SURFACE_FLAG_FRAME;
+
+                i965_post_processing_internal(ctx, i965->pp_context,
+                                              &src_surface,
+                                              src_rect,
+                                              &dst_surface,
+                                              dst_rect,
+                                              PP_NV12_AVS,
+                                              NULL);
 
                 if (in_surface_id != surface)
                     i965_DestroySurfaces(ctx, &in_surface_id, 1);
@@ -2275,108 +4266,329 @@ i965_post_processing(
     return out_surface_id;
 }       
 
-Bool
-i965_post_processing_terminate(VADriverContextP ctx)
+static VAStatus
+i965_image_pl3_processing(VADriverContextP ctx,
+                          const struct i965_surface *src_surface,
+                          const VARectangle *src_rect,
+                          struct i965_surface *dst_surface,
+                          const VARectangle *dst_rect)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct i965_post_processing_context *pp_context = i965->pp_context;
-    int i;
+    int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
+
+    if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+        i965_post_processing_internal(ctx, i965->pp_context,
+                                      src_surface,
+                                      src_rect,
+                                      dst_surface,
+                                      dst_rect,
+                                      PP_PL3_LOAD_SAVE_N12,
+                                      NULL);
+    } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
+               fourcc == VA_FOURCC('I', 'M', 'C', '3') ||
+               fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
+               fourcc == VA_FOURCC('I', '4', '2', '0') ) {
+        i965_post_processing_internal(ctx, i965->pp_context,
+                                      src_surface,
+                                      src_rect,
+                                      dst_surface,
+                                      dst_rect,
+                                      PP_PL3_LOAD_SAVE_PL3,
+                                      NULL);
+    } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
+               fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+        i965_post_processing_internal(ctx, i965->pp_context,
+                                      src_surface,
+                                      src_rect,
+                                      dst_surface,
+                                      dst_rect,
+                                      PP_PL3_LOAD_SAVE_PA,
+                                      NULL);
+
+    }
+    else {
+        assert(0);
+    }
+
+    intel_batchbuffer_flush(pp_context->batch);
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+i965_image_pl2_processing(VADriverContextP ctx,
+                          const struct i965_surface *src_surface,
+                          const VARectangle *src_rect,
+                          struct i965_surface *dst_surface,
+                          const VARectangle *dst_rect)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_post_processing_context *pp_context = i965->pp_context;
+    int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
+
+    if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+        i965_post_processing_internal(ctx, i965->pp_context,
+                                      src_surface,
+                                      src_rect,
+                                      dst_surface,
+                                      dst_rect,
+                                      PP_NV12_LOAD_SAVE_N12,
+                                      NULL);
+    } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
+               fourcc == VA_FOURCC('I', 'M', 'C', '3') ||
+               fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
+               fourcc == VA_FOURCC('I', '4', '2', '0') ) {
+        i965_post_processing_internal(ctx, i965->pp_context,
+                                      src_surface,
+                                      src_rect,
+                                      dst_surface,
+                                      dst_rect,
+                                      PP_NV12_LOAD_SAVE_PL3,
+                                      NULL);
+    } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
+               fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+        i965_post_processing_internal(ctx, i965->pp_context,
+                                      src_surface,
+                                      src_rect,
+                                      dst_surface,
+                                      dst_rect,
+                                      PP_NV12_LOAD_SAVE_PA,
+                                      NULL);
+    }
+
+    intel_batchbuffer_flush(pp_context->batch);
+
+    return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+i965_image_pl1_processing(VADriverContextP ctx,
+                          const struct i965_surface *src_surface,
+                          const VARectangle *src_rect,
+                          struct i965_surface *dst_surface,
+                          const VARectangle *dst_rect)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_post_processing_context *pp_context = i965->pp_context;
+    int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
+
+    if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+        i965_post_processing_internal(ctx, i965->pp_context,
+                                      src_surface,
+                                      src_rect,
+                                      dst_surface,
+                                      dst_rect,
+                                      PP_PA_LOAD_SAVE_NV12,
+                                      NULL);
+    }
+    else {
+        return VA_STATUS_ERROR_UNKNOWN;
+    }
+
+    intel_batchbuffer_flush(pp_context->batch);
+
+    return VA_STATUS_SUCCESS;
+}
+
+VAStatus
+i965_image_processing(VADriverContextP ctx,
+                      const struct i965_surface *src_surface,
+                      const VARectangle *src_rect,
+                      struct i965_surface *dst_surface,
+                      const VARectangle *dst_rect)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
 
     if (HAS_PP(i965)) {
-        if (pp_context) {
-            dri_bo_unreference(pp_context->curbe.bo);
-            pp_context->curbe.bo = NULL;
+        int fourcc = pp_get_surface_fourcc(ctx, src_surface);
+
+        switch (fourcc) {
+        case VA_FOURCC('Y', 'V', '1', '2'):
+        case VA_FOURCC('I', '4', '2', '0'):
+        case VA_FOURCC('I', 'M', 'C', '1'):
+        case VA_FOURCC('I', 'M', 'C', '3'):
+            status = i965_image_pl3_processing(ctx,
+                                               src_surface,
+                                               src_rect,
+                                               dst_surface,
+                                               dst_rect);
+            break;
+
+        case  VA_FOURCC('N', 'V', '1', '2'):
+            status = i965_image_pl2_processing(ctx,
+                                               src_surface,
+                                               src_rect,
+                                               dst_surface,
+                                               dst_rect);
+            break;
+        case  VA_FOURCC('Y', 'U', 'Y', '2'):
+        case VA_FOURCC('U', 'Y', 'V', 'Y'):
+            status = i965_image_pl1_processing(ctx,
+                                               src_surface,
+                                               src_rect,
+                                               dst_surface,
+                                               dst_rect);
+            break;
+
+        default:
+            status = VA_STATUS_ERROR_UNIMPLEMENTED;
+            break;
+        }
+    }
 
-            for (i = 0; i < MAX_PP_SURFACES; i++) {
-                dri_bo_unreference(pp_context->surfaces[i].ss_bo);
-                pp_context->surfaces[i].ss_bo = NULL;
+    return status;
+}     
 
-                dri_bo_unreference(pp_context->surfaces[i].s_bo);
-                pp_context->surfaces[i].s_bo = NULL;
-            }
 
-            dri_bo_unreference(pp_context->sampler_state_table.bo);
-            pp_context->sampler_state_table.bo = NULL;
+static void
+i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
+{
+    int i;
 
-            dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
-            pp_context->sampler_state_table.bo_8x8 = NULL;
+    dri_bo_unreference(pp_context->surface_state_binding_table.bo);
+    pp_context->surface_state_binding_table.bo = NULL;
 
-            dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
-            pp_context->sampler_state_table.bo_8x8_uv = NULL;
+    dri_bo_unreference(pp_context->curbe.bo);
+    pp_context->curbe.bo = NULL;
 
-            dri_bo_unreference(pp_context->binding_table.bo);
-            pp_context->binding_table.bo = NULL;
+    dri_bo_unreference(pp_context->sampler_state_table.bo);
+    pp_context->sampler_state_table.bo = NULL;
 
-            dri_bo_unreference(pp_context->idrt.bo);
-            pp_context->idrt.bo = NULL;
-            pp_context->idrt.num_interface_descriptors = 0;
+    dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
+    pp_context->sampler_state_table.bo_8x8 = NULL;
 
-            dri_bo_unreference(pp_context->vfe_state.bo);
-            pp_context->vfe_state.bo = NULL;
+    dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
+    pp_context->sampler_state_table.bo_8x8_uv = NULL;
 
-            dri_bo_unreference(pp_context->stmm.bo);
-            pp_context->stmm.bo = NULL;
+    dri_bo_unreference(pp_context->idrt.bo);
+    pp_context->idrt.bo = NULL;
+    pp_context->idrt.num_interface_descriptors = 0;
 
-            for (i = 0; i < NUM_PP_MODULES; i++) {
-                struct pp_module *pp_module = &pp_context->pp_modules[i];
+    dri_bo_unreference(pp_context->vfe_state.bo);
+    pp_context->vfe_state.bo = NULL;
 
-                dri_bo_unreference(pp_module->kernel.bo);
-                pp_module->kernel.bo = NULL;
-            }
+    dri_bo_unreference(pp_context->stmm.bo);
+    pp_context->stmm.bo = NULL;
 
-            free(pp_context);
-        }
+    for (i = 0; i < NUM_PP_MODULES; i++) {
+        struct pp_module *pp_module = &pp_context->pp_modules[i];
+
+        dri_bo_unreference(pp_module->kernel.bo);
+        pp_module->kernel.bo = NULL;
+    }
+
+    free(pp_context->pp_static_parameter);
+    free(pp_context->pp_inline_parameter);
+    pp_context->pp_static_parameter = NULL;
+    pp_context->pp_inline_parameter = NULL;
+}
+
+Bool
+i965_post_processing_terminate(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_post_processing_context *pp_context = i965->pp_context;
 
-        i965->pp_context = NULL;
+    if (pp_context) {
+       if(IS_HASWELL(i965->intel.device_id)){
+           gen75_vebox_context_destroy(ctx, pp_context->pp_vebox_context);
+        }
+ 
+        i965_post_processing_context_finalize(pp_context);
+        free(pp_context);
     }
 
+    i965->pp_context = NULL;
+
     return True;
 }
 
+static void
+i965_post_processing_context_init(VADriverContextP ctx,
+                                  struct i965_post_processing_context *pp_context,
+                                  struct intel_batchbuffer *batch)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int i;
+
+    pp_context->urb.size = URB_SIZE((&i965->intel));
+    pp_context->urb.num_vfe_entries = 32;
+    pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
+    pp_context->urb.num_cs_entries = 1;
+
+    if (IS_GEN7(i965->intel.device_id))
+        pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
+    else
+        pp_context->urb.size_cs_entry = 2;
+
+   pp_context->urb.vfe_start = 0;
+    pp_context->urb.cs_start = pp_context->urb.vfe_start + 
+        pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
+    assert(pp_context->urb.cs_start + 
+           pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
+
+    assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
+    assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
+    assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
+    assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
+
+    if (IS_HASWELL(i965->intel.device_id))
+        memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
+    else if (IS_GEN7(i965->intel.device_id))
+        memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
+    else if (IS_GEN6(i965->intel.device_id))
+        memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
+    else if (IS_IRONLAKE(i965->intel.device_id))
+        memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
+
+    for (i = 0; i < NUM_PP_MODULES; i++) {
+        struct pp_module *pp_module = &pp_context->pp_modules[i];
+        dri_bo_unreference(pp_module->kernel.bo);
+        if (pp_module->kernel.bin && pp_module->kernel.size) {
+            pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
+                                                pp_module->kernel.name,
+                                                pp_module->kernel.size,
+                                                4096);
+            assert(pp_module->kernel.bo);
+            dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
+        } else {
+            pp_module->kernel.bo = NULL;
+        }
+    }
+
+    /* static & inline parameters */
+    if (IS_GEN7(i965->intel.device_id)) {
+        pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
+        pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
+    } else {
+        pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
+        pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
+    }
+
+    pp_context->batch = batch;
+}
+
 Bool
 i965_post_processing_init(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct i965_post_processing_context *pp_context = i965->pp_context;
-    int i;
 
     if (HAS_PP(i965)) {
         if (pp_context == NULL) {
             pp_context = calloc(1, sizeof(*pp_context));
+            i965_post_processing_context_init(ctx, pp_context, i965->batch);
             i965->pp_context = pp_context;
 
-            pp_context->urb.size = URB_SIZE((&i965->intel));
-            pp_context->urb.num_vfe_entries = 32;
-            pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
-            pp_context->urb.num_cs_entries = 1;
-            pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
-            pp_context->urb.vfe_start = 0;
-            pp_context->urb.cs_start = pp_context->urb.vfe_start + 
-                pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
-            assert(pp_context->urb.cs_start + 
-                   pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
-
-            assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
-            assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
-
-            if (IS_GEN6(i965->intel.device_id) ||
-                IS_GEN7(i965->intel.device_id))
-                memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
-            else if (IS_IRONLAKE(i965->intel.device_id))
-                memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
-
-            for (i = 0; i < NUM_PP_MODULES; i++) {
-                struct pp_module *pp_module = &pp_context->pp_modules[i];
-                dri_bo_unreference(pp_module->kernel.bo);
-                pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
-                                                    pp_module->kernel.name,
-                                                    pp_module->kernel.size,
-                                                    4096);
-                assert(pp_module->kernel.bo);
-                dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
+            if(IS_HASWELL(i965->intel.device_id)){
+                pp_context->pp_vebox_context = gen75_vebox_context_init(ctx);
             }
         }
     }
 
     return True;
 }
+
+
diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h
old mode 100644
new mode 100755
index 5f4e949..e55d1f6
--- a/src/i965_post_processing.h
+++ b/src/i965_post_processing.h
@@ -29,26 +29,35 @@
 #ifndef __I965_POST_PROCESSING_H__
 #define __I965_POST_PROCESSING_H__
 
-#define MAX_PP_SURFACES 32
+#define MAX_PP_SURFACES  48
 
-#define I965_PP_FLAG_TOP_FIELD          1
-#define I965_PP_FLAG_BOTTOM_FIELD       2
-#define I965_PP_FLAG_DEINTERLACING      4 /* XXX: don't support MCDI yet */
-#define I965_PP_FLAG_AVS                8
+#define I965_PP_FLAG_TOP_FIELD                 1
+#define I965_PP_FLAG_BOTTOM_FIELD              2
+
+#define I965_PP_FLAG_AVS                       4
+#define I965_PP_FLAG_DEINTERLACING             8
 
 enum
 {
     PP_NULL = 0,
-    PP_NV12_LOAD_SAVE,
+    PP_NV12_LOAD_SAVE_N12,
+    PP_NV12_LOAD_SAVE_PL3,
+    PP_PL3_LOAD_SAVE_N12,
+    PP_PL3_LOAD_SAVE_PL3,
     PP_NV12_SCALING,
     PP_NV12_AVS,
     PP_NV12_DNDI,
+    PP_NV12_DN,
+    PP_NV12_LOAD_SAVE_PA,
+    PP_PL3_LOAD_SAVE_PA,
+    PP_PA_LOAD_SAVE_NV12,
+    NUM_PP_MODULES,
 };
 
-#define NUM_PP_MODULES                  5
-
 struct pp_load_save_context
 {
+    int dest_x;
+    int dest_y;
     int dest_w;
     int dest_h;
 };
@@ -81,14 +90,25 @@ struct pp_dndi_context
     int dest_h;
 };
 
+struct pp_dn_context
+{
+    int dest_w;
+    int dest_h;
+};
+
+struct i965_post_processing_context;
+ 
 struct pp_module
 {
     struct i965_kernel kernel;
     
     /* others */
-    void (*initialize)(VADriverContextP ctx, 
-                       VASurfaceID in_surface_id, VASurfaceID out_surface_id,
-                       const VARectangle *src_rect, const VARectangle *dst_rect);
+    VAStatus (*initialize)(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+                       const struct i965_surface *src_surface,
+                       const VARectangle *src_rect,
+                       struct i965_surface *dst_surface,
+                       const VARectangle *dst_rect,
+                       void *filter_param);
 };
 
 struct pp_static_parameter
@@ -278,6 +298,9 @@ struct pp_inline_parameter
         unsigned int block_count_x:8;
 
         /* r5.6 */
+        /* we only support M*1 or 1*N block partitation now.
+         *   -- it means asm code only need update this mask from grf6 for the last block 
+         */
         unsigned int block_horizontal_mask:16;
         unsigned int block_vertical_mask:8;
         unsigned int number_blocks:8;
@@ -290,30 +313,126 @@ struct pp_inline_parameter
         /* AVS r6.0 */
         float video_step_delta;
 
-        /* r6.1-r6.7 */
+        /* r6.1 */    // sizeof(int) == 4?    
+        unsigned int block_horizontal_mask_right:16;
+        unsigned int block_vertical_mask_bottom:8;
+        unsigned int pad1:8;
+
+        /* r6.2 */
+        unsigned int block_horizontal_mask_middle:16;
+        unsigned int pad2:16;
+
+        /* r6.3-r6.7 */
+        unsigned int padx[5];
+    } grf6;
+};
+
+struct gen7_pp_static_parameter
+{
+    struct {
+        /* r1.0-r1.5 */
+        unsigned int padx[6];
+        /* r1.6 */
+        unsigned int di_statistics_surface_pitch_div2:16;
+        unsigned int di_statistics_surface_height_div4:16;
+        /* r1.7 */
+        unsigned int di_top_field_first:8;
+        unsigned int pad0:16;
+        unsigned int pointer_to_inline_parameter:8; /* value: 7 */
+    } grf1;
+
+    struct {
+        /* r2.0 */
+        unsigned int pad3;
+
+        /* r2.1 */
+        unsigned int pad2:16;
+        unsigned int save_avs_rgb_swap:1; /* 0: RGB, 1: BGR */
+        unsigned int avs_wa_enable:1; /* must enabled for GEN7 */
+        unsigned int pad1:1;
+        unsigned int avs_wa_width:13;
+
+        /* 2.2 */
+        float avs_wa_one_div_256_width;
+
+        /* 2.3 */
+        float avs_wa_five_div_256_width;
+        
+        /* 2.4 - 2.6 */
+        unsigned int padx[3];
+
+        /* r2.7 */
+        unsigned int di_destination_packed_y_component_offset:8;
+        unsigned int di_destination_packed_u_component_offset:8;
+        unsigned int di_destination_packed_v_component_offset:8;
+        unsigned int pad0:8;
+    } grf2;
+
+    struct {
+        float sampler_load_horizontal_scaling_step_ratio;
+        unsigned int padx[7];
+    } grf3;
+
+    struct {
+        float sampler_load_vertical_scaling_step;
+        unsigned int pad0;
+        unsigned int di_hoffset_svf_from_dvf:16;
+        unsigned int di_voffset_svf_from_dvf:16;
+        unsigned int padx[5];
+    } grf4;
+
+    struct {
+        float sampler_load_vertical_frame_origin;
+        unsigned int padx[7];
+    } grf5;
+
+    struct {
+        float sampler_load_horizontal_frame_origin;
         unsigned int padx[7];
     } grf6;
 };
 
+struct gen7_pp_inline_parameter
+{
+    struct {
+        /* r7.0 */
+        unsigned int destination_block_horizontal_origin:16;
+        unsigned int destination_block_vertical_origin:16;
+        /* r7.1: 0xffffffff */
+        unsigned int constant_0;
+        /* r7.2 */
+        unsigned int pad0;
+        /* r7.3 */
+        unsigned int pad1;
+        /* r7.4 */
+        float sampler_load_main_video_x_scaling_step;
+        /* r7.5 */
+        unsigned int pad2;
+        /* r7.6: must be zero */
+        unsigned int avs_vertical_block_number;
+        /* r7.7: 0 */
+        unsigned int group_id_number;
+    } grf7;
+
+    struct {
+        unsigned int padx[8];
+    } grf8;
+};
+
 struct i965_post_processing_context
 {
     int current_pp;
     struct pp_module pp_modules[NUM_PP_MODULES];
-    struct pp_static_parameter pp_static_parameter;
-    struct pp_inline_parameter pp_inline_parameter;
+    void *pp_static_parameter;
+    void *pp_inline_parameter;
 
     struct {
         dri_bo *bo;
-    } curbe;
-
-    struct {
-        dri_bo *ss_bo;
-        dri_bo *s_bo;
-    } surfaces[MAX_PP_SURFACES];
+    } surface_state_binding_table;
 
     struct {
         dri_bo *bo;
-    } binding_table;
+    } curbe;
 
     struct {
         dri_bo *bo;
@@ -352,11 +471,19 @@ struct i965_post_processing_context
         struct pp_scaling_context pp_scaling_context;
         struct pp_avs_context pp_avs_context;
         struct pp_dndi_context pp_dndi_context;
+        struct pp_dn_context pp_dn_context;
     } private_context;
 
     int (*pp_x_steps)(void *private_context);
     int (*pp_y_steps)(void *private_context);
     int (*pp_set_block_parameter)(struct i965_post_processing_context *pp_context, int x, int y);
+    struct intel_batchbuffer *batch;
+    unsigned int block_horizontal_mask_left:16;
+    unsigned int block_horizontal_mask_right:16;
+    unsigned int block_vertical_mask_bottom:8;
+ 
+    /* video process based on hsw vebox */ 
+    struct intel_vebox_context *pp_vebox_context;
 };
 
 VASurfaceID
@@ -369,6 +496,13 @@ i965_post_processing(
     int                *has_done_scaling 
 );
 
+VAStatus
+i965_image_processing(VADriverContextP ctx,
+                      const struct i965_surface *src_surface,
+                      const VARectangle *src_rect,
+                      struct i965_surface *dst_surface,
+                      const VARectangle *dst_rect);
+
 Bool
 i965_post_processing_terminate(VADriverContextP ctx);
 Bool
diff --git a/src/i965_render.c b/src/i965_render.c
index f6b6dde..4adfba6 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -36,8 +36,6 @@
 #include <string.h>
 #include <assert.h>
 
-#include <va/va_dricommon.h>
-
 #include "intel_batchbuffer.h"
 #include "intel_driver.h"
 #include "i965_defines.h"
@@ -133,6 +131,14 @@ static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
 #include "shaders/render/exa_wm_write.g7b"
 };
 
+/* Programs for Haswell */
+static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
+#include "shaders/render/exa_wm_src_affine.g7b"
+#include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
+#include "shaders/render/exa_wm_yuv_rgb.g7b"
+#include "shaders/render/exa_wm_write.g7b"
+};
+
 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
@@ -257,6 +263,31 @@ static struct i965_kernel render_kernels_gen7[] = {
     }
 };
 
+static struct i965_kernel render_kernels_gen7_haswell[] = {
+    {
+        "SF",
+        SF_KERNEL,
+        sf_kernel_static_gen7,
+        sizeof(sf_kernel_static_gen7),
+        NULL
+    },
+    {
+        "PS",
+        PS_KERNEL,
+        ps_kernel_static_gen7_haswell,
+        sizeof(ps_kernel_static_gen7_haswell),
+        NULL
+    },
+
+    {
+        "PS_SUBPIC",
+        PS_SUBPIC_KERNEL,
+        ps_subpic_kernel_static_gen7,
+        sizeof(ps_subpic_kernel_static_gen7),
+        NULL
+    }
+};
+
 #define URB_VS_ENTRIES	      8
 #define URB_VS_ENTRY_SIZE     1
 
@@ -697,6 +728,16 @@ gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
    }
 }
 
+/* Set "Shader Channel Select" */
+void
+gen7_render_set_surface_scs(struct gen7_surface_state *ss)
+{
+    ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+    ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+    ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+    ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+}
+
 static void
 gen7_render_set_surface_state(
     struct gen7_surface_state *ss,
@@ -767,6 +808,8 @@ i965_render_src_surface_state(
                                       region, offset,
                                       w, h,
                                       pitch, format, flags);
+        if (IS_HASWELL(i965->intel.device_id))
+            gen7_render_set_surface_scs(ss);
         dri_bo_emit_reloc(ss_bo,
                           I915_GEM_DOMAIN_SAMPLER, 0,
                           offset,
@@ -848,16 +891,11 @@ i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);  
     struct object_surface *obj_surface = SURFACE(surface);
-    int w, h;
-    dri_bo *region;
     dri_bo *subpic_region;
     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
     struct object_image *obj_image = IMAGE(obj_subpic->image);
     assert(obj_surface);
     assert(obj_surface->bo);
-    w = obj_surface->width;
-    h = obj_surface->height;
-    region = obj_surface->bo;
     subpic_region = obj_image->bo;
     /*subpicture surface*/
     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
@@ -890,6 +928,8 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
                                       dest_region->bo, 0,
                                       dest_region->width, dest_region->height,
                                       dest_region->pitch, format, 0);
+        if (IS_HASWELL(i965->intel.device_id))
+            gen7_render_set_surface_scs(ss);
         dri_bo_emit_reloc(ss_bo,
                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                           0,
@@ -911,18 +951,56 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
     dri_bo_unmap(ss_bo);
 }
 
+static void
+i965_fill_vertex_buffer(
+    VADriverContextP ctx,
+    float tex_coords[4], /* [(u1,v1);(u2,v2)] */
+    float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
+)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);
+    float vb[12];
+
+    enum { X1, Y1, X2, Y2 };
+
+    static const unsigned int g_rotation_indices[][6] = {
+        [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
+        [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
+        [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
+        [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
+    };
+
+    const unsigned int * const rotation_indices =
+        g_rotation_indices[i965->rotation_attrib->value];
+
+    vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
+    vb[1]  = tex_coords[rotation_indices[1]];
+    vb[2]  = vid_coords[X2];
+    vb[3]  = vid_coords[Y2];
+
+    vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
+    vb[5]  = tex_coords[rotation_indices[3]];
+    vb[6]  = vid_coords[X1];
+    vb[7]  = vid_coords[Y2];
+
+    vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
+    vb[9]  = tex_coords[rotation_indices[5]];
+    vb[10] = vid_coords[X1];
+    vb[11] = vid_coords[Y1];
+
+    dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
+}
+
 static void 
 i965_subpic_render_upload_vertex(VADriverContextP ctx,
                                  VASurfaceID surface,
                                  const VARectangle *output_rect)
 {    
     struct i965_driver_data  *i965         = i965_driver_data(ctx);
-    struct i965_render_state *render_state = &i965->render_state;
     struct object_surface    *obj_surface  = SURFACE(surface);
     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
+    float tex_coords[4], vid_coords[4];
     VARectangle dst_rect;
-    float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
-    int i = 0;
 
     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
         dst_rect = obj_subpic->dst_rect;
@@ -935,35 +1013,17 @@ i965_subpic_render_upload_vertex(VADriverContextP ctx,
         dst_rect.height = sy * obj_subpic->dst_rect.height;
     }
 
-    dri_bo_map(render_state->vb.vertex_buffer, 1);
-    assert(render_state->vb.vertex_buffer->virtual);
-    vb = render_state->vb.vertex_buffer->virtual;
-
-    tx1 = (float)obj_subpic->src_rect.x / obj_subpic->width;
-    ty1 = (float)obj_subpic->src_rect.y / obj_subpic->height;
-    tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
-    ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
+    tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
+    tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
+    tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
+    tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
 
-    x1 = (float)dst_rect.x;
-    y1 = (float)dst_rect.y;
-    x2 = (float)(dst_rect.x + dst_rect.width);
-    y2 = (float)(dst_rect.y + dst_rect.height);
+    vid_coords[0] = dst_rect.x;
+    vid_coords[1] = dst_rect.y;
+    vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
+    vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
 
-    vb[i++] = tx2;
-    vb[i++] = ty2;
-    vb[i++] = x2;
-    vb[i++] = y2;
-
-    vb[i++] = tx1;
-    vb[i++] = ty2;
-    vb[i++] = x1;
-    vb[i++] = y2;
-
-    vb[i++] = tx1;
-    vb[i++] = ty1;
-    vb[i++] = x1;
-    vb[i++] = y1;
-    dri_bo_unmap(render_state->vb.vertex_buffer);
+    i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
 }
 
 static void 
@@ -978,46 +1038,26 @@ i965_render_upload_vertex(
     struct i965_render_state *render_state = &i965->render_state;
     struct intel_region *dest_region = render_state->draw_region;
     struct object_surface *obj_surface;
-    float *vb;
-
-    float u1, v1, u2, v2;
-    int i, width, height;
-    int box_x1 = dest_region->x + dst_rect->x;
-    int box_y1 = dest_region->y + dst_rect->y;
-    int box_x2 = box_x1 + dst_rect->width;
-    int box_y2 = box_y1 + dst_rect->height;
+    float tex_coords[4], vid_coords[4];
+    int width, height;
 
     obj_surface = SURFACE(surface);
     assert(surface);
-    width = obj_surface->orig_width;
-    height = obj_surface->orig_height;
-
-    u1 = (float)src_rect->x / width;
-    v1 = (float)src_rect->y / height;
-    u2 = (float)(src_rect->x + src_rect->width) / width;
-    v2 = (float)(src_rect->y + src_rect->height) / height;
 
-    dri_bo_map(render_state->vb.vertex_buffer, 1);
-    assert(render_state->vb.vertex_buffer->virtual);
-    vb = render_state->vb.vertex_buffer->virtual;
+    width  = obj_surface->orig_width;
+    height = obj_surface->orig_height;
 
-    i = 0;
-    vb[i++] = u2;
-    vb[i++] = v2;
-    vb[i++] = (float)box_x2;
-    vb[i++] = (float)box_y2;
-    
-    vb[i++] = u1;
-    vb[i++] = v2;
-    vb[i++] = (float)box_x1;
-    vb[i++] = (float)box_y2;
+    tex_coords[0] = (float)src_rect->x / width;
+    tex_coords[1] = (float)src_rect->y / height;
+    tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
+    tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
 
-    vb[i++] = u1;
-    vb[i++] = v1;
-    vb[i++] = (float)box_x1;
-    vb[i++] = (float)box_y1;
+    vid_coords[0] = dest_region->x + dst_rect->x;
+    vid_coords[1] = dest_region->y + dst_rect->y;
+    vid_coords[2] = vid_coords[0] + dst_rect->width;
+    vid_coords[3] = vid_coords[1] + dst_rect->height;
 
-    dri_bo_unmap(render_state->vb.vertex_buffer);
+    i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
 }
 
 static void
@@ -2427,6 +2467,10 @@ gen7_emit_urb(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct intel_batchbuffer *batch = i965->batch;
+    unsigned int num_urb_entries = 32;
+
+    if (IS_HASWELL(i965->intel.device_id))
+        num_urb_entries = 64;
 
     BEGIN_BATCH(batch, 2);
     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
@@ -2436,7 +2480,7 @@ gen7_emit_urb(VADriverContextP ctx)
     BEGIN_BATCH(batch, 2);
     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
     OUT_BATCH(batch, 
-              (32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
+              (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
    ADVANCE_BATCH(batch);
@@ -2731,6 +2775,13 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel)
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct intel_batchbuffer *batch = i965->batch;
     struct i965_render_state *render_state = &i965->render_state;
+    unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
+    unsigned int num_samples = 0;
+
+    if (IS_HASWELL(i965->intel.device_id)) {
+        max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
+        num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
+    }
 
     BEGIN_BATCH(batch, 3);
     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
@@ -2764,7 +2815,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel)
               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
     OUT_BATCH(batch, 0); /* scratch space base offset */
     OUT_BATCH(batch, 
-              ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
+              ((86 - 1) << max_threads_shift) | num_samples |
               GEN7_PS_PUSH_CONSTANT_ENABLE |
               GEN7_PS_ATTRIBUTE_ENABLE |
               GEN7_PS_16_DISPATCH_ENABLE);
@@ -3012,7 +3063,9 @@ i965_render_init(VADriverContextP ctx)
                                  sizeof(render_kernels_gen6[0])));
 
     if (IS_GEN7(i965->intel.device_id))
-        memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels));
+        memcpy(render_state->render_kernels,
+               (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
+               sizeof(render_state->render_kernels));
     else if (IS_GEN6(i965->intel.device_id))
         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
     else if (IS_IRONLAKE(i965->intel.device_id))
diff --git a/src/i965_render.h b/src/i965_render.h
index 96a1512..c2fc2bf 100644
--- a/src/i965_render.h
+++ b/src/i965_render.h
@@ -98,4 +98,9 @@ intel_render_put_subpicture(
     const VARectangle *dst_rect
 );
 
+struct gen7_surface_state;
+
+void
+gen7_render_set_surface_scs(struct gen7_surface_state *ss);
+
 #endif /* _I965_RENDER_H_ */
diff --git a/src/i965_structs.h b/src/i965_structs.h
index 12a8d14..77f2570 100644
--- a/src/i965_structs.h
+++ b/src/i965_structs.h
@@ -799,7 +799,8 @@ struct i965_sampler_8x8_coefficient
     } dw3;
 
     struct {
-        int pad0:16;
+        int table_1x_filter_c0:8;
+        int table_1x_filter_c1:8;
         int table_1x_filter_c2:8;
         int table_1x_filter_c3:8;
     } dw4;
@@ -807,11 +808,13 @@ struct i965_sampler_8x8_coefficient
     struct {
         int table_1x_filter_c4:8;
         int table_1x_filter_c5:8;
-        int pad0:16;
+        int table_1x_filter_c6:8;
+        int table_1x_filter_c7:8;
     } dw5;
 
     struct {
-        int pad0:16;
+        int table_1y_filter_c0:8;
+        int table_1y_filter_c1:8;
         int table_1y_filter_c2:8;
         int table_1y_filter_c3:8;
     } dw6;
@@ -819,7 +822,8 @@ struct i965_sampler_8x8_coefficient
     struct {
         int table_1y_filter_c4:8;
         int table_1y_filter_c5:8;
-        int pad0:16;
+        int table_1y_filter_c6:8;
+        int table_1y_filter_c7:8;
     } dw7;
 };
 
@@ -1195,7 +1199,11 @@ struct gen7_surface_state
 
     struct {
         unsigned int resource_min_lod:12;
-        unsigned int pad0:16;
+        unsigned int pad0:4;
+        unsigned int shader_chanel_select_a:3;
+        unsigned int shader_chanel_select_b:3;
+        unsigned int shader_chanel_select_g:3;
+        unsigned int shader_chanel_select_r:3;
         unsigned int alpha_clear_color:1;
         unsigned int blue_clear_color:1;
         unsigned int green_clear_color:1;
@@ -1304,4 +1312,134 @@ struct gen7_surface_state2
     } ss7;
 };
 
+struct gen7_sampler_8x8
+{
+    struct {
+        unsigned int global_noise_estimation:8;
+        unsigned int pad0:8;
+        unsigned int chroma_key_index:2;
+        unsigned int chroma_key_enable:1;
+        unsigned int pad1:10;
+        unsigned int ief_bypass:1;
+        unsigned int pad2:1;
+        unsigned int disable_8x8_filter:1;
+    } dw0;
+
+    struct {
+        unsigned int pad0:5;
+        unsigned int sampler_8x8_state_pointer:27;
+    } dw1;
+    
+    struct {
+        unsigned int weak_edge_threshold:6;
+        unsigned int pad0:2;
+        unsigned int strong_edge_threshold:6;
+        unsigned int pad1:2;
+        unsigned int r5x_coefficient:5;
+        unsigned int r5cx_coefficient:5;
+        unsigned int r5c_coefficient:5;
+        unsigned int pad2:1;
+    } dw2;
+
+    struct {
+        unsigned int r3x_coefficient:5;
+        unsigned int pad0:1;
+        unsigned int r3c_coefficient:5;
+        unsigned int pad1:3;
+        unsigned int gain_factor:6;
+        unsigned int non_edge_weight:3;
+        unsigned int pad2:1;
+        unsigned int regular_weight:3;
+        unsigned int pad3:1;
+        unsigned int strong_edge_weight:3;
+        unsigned int ief4_smooth_enable:1;
+    } dw3;
+};
+
+struct gen7_sampler_dndi
+{
+    struct {
+        unsigned int denoise_asd_threshold:8;
+        unsigned int dnmh_delt:4;
+        unsigned int vdi_walker_y_stride:2;
+        unsigned int vdi_walker_frame_sharing_enable:1;
+        unsigned int pad0:1;
+        unsigned int denoise_maximum_history:8;
+        unsigned int denoise_stad_threshold:8;
+    } dw0;
+
+    struct {
+        unsigned int denoise_threshold_for_sum_of_complexity_measure:8;
+        unsigned int denoise_moving_pixel_threshold:5;
+        unsigned int stmm_c2:3;
+        unsigned int low_temporal_difference_threshold:6;
+        unsigned int pad0:2;
+        unsigned int temporal_difference_threshold:6;
+        unsigned int pad1:2;
+    } dw1;
+
+    struct {
+        unsigned int block_noise_estimate_noise_threshold:8;
+        unsigned int bne_edge_th:4;
+        unsigned int pad0:2;
+        unsigned int smooth_mv_th:2;
+        unsigned int sad_tight_th:4;
+        unsigned int cat_slope_minus1:4;
+        unsigned int good_neighbor_th:6;
+        unsigned int pad1:2;
+   } dw2;
+
+    struct {
+        unsigned int maximum_stmm:8;
+        unsigned int multipler_for_vecm:6;
+        unsigned int pad0:2;
+        unsigned int blending_constant_across_time_for_small_values_of_stmm:8;
+        unsigned int blending_constant_across_time_for_large_values_of_stmm:7;
+        unsigned int stmm_blending_constant_select:1;
+    } dw3;
+
+    struct {
+        unsigned int sdi_delta:8;
+        unsigned int sdi_threshold:8;
+        unsigned int stmm_output_shift:4;
+        unsigned int stmm_shift_up:2;
+        unsigned int stmm_shift_down:2;
+        unsigned int minimum_stmm:8;
+    } dw4;
+
+    struct {
+        unsigned int fmd_temporal_difference_threshold:8;
+        unsigned int sdi_fallback_mode_2_constant:8;
+        unsigned int sdi_fallback_mode_1_t2_constant:8;
+        unsigned int sdi_fallback_mode_1_t1_constant:8;
+    } dw5;
+
+    struct {
+        unsigned int dn_enable:1;
+        unsigned int di_enable:1;
+        unsigned int di_partial:1;
+        unsigned int dndi_top_first:1;
+        unsigned int dndi_stream_id:1;
+        unsigned int dndi_first_frame:1;
+        unsigned int progressive_dn:1;
+        unsigned int mcdi_enable:1;
+        unsigned int fmd_tear_threshold:6;
+        unsigned int cat_th1:2;
+        unsigned int fmd2_vertical_difference_threshold:8;
+        unsigned int fmd1_vertical_difference_threshold:8;
+    } dw6;
+
+    struct {
+        unsigned int sad_tha:4;
+        unsigned int sad_thb:4;
+        unsigned int fmd_for_1st_field_of_current_frame:2;
+        unsigned int mc_pixel_consistency_th:6;
+        unsigned int fmd_for_2nd_field_of_previous_frame:2;
+        unsigned int vdi_walker_enable:1;
+        unsigned int neighborpixel_th:4;
+        unsigned int column_width_minus1:9;
+    } dw7;
+};
+
+
 #endif /* _I965_STRUCTS_H_ */
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 0b52281..94d968c 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -31,15 +31,18 @@
 
 #include "intel_batchbuffer.h"
 
+#define MAX_BATCH_SIZE		0x400000
+
 static void 
-intel_batchbuffer_reset(struct intel_batchbuffer *batch)
+intel_batchbuffer_reset(struct intel_batchbuffer *batch, int buffer_size)
 {
     struct intel_driver_data *intel = batch->intel; 
-    int batch_size = BATCH_SIZE;
+    int batch_size = buffer_size;
 
     assert(batch->flag == I915_EXEC_RENDER ||
            batch->flag == I915_EXEC_BLT ||
-           batch->flag == I915_EXEC_BSD);
+           batch->flag == I915_EXEC_BSD ||
+           batch->flag == I915_EXEC_VEBOX);
 
     dri_bo_unreference(batch->buffer);
     batch->buffer = dri_bo_alloc(intel->bufmgr, 
@@ -63,17 +66,27 @@ intel_batchbuffer_space(struct intel_batchbuffer *batch)
 
 
 struct intel_batchbuffer * 
-intel_batchbuffer_new(struct intel_driver_data *intel, int flag)
+intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size)
 {
     struct intel_batchbuffer *batch = calloc(1, sizeof(*batch));
     assert(flag == I915_EXEC_RENDER ||
            flag == I915_EXEC_BSD ||
-           flag == I915_EXEC_BLT);
+           flag == I915_EXEC_BLT ||
+           flag == I915_EXEC_VEBOX);
+
+   if (!buffer_size || buffer_size < BATCH_SIZE) {
+	buffer_size = BATCH_SIZE;
+   }
+
+   /* the buffer size can't exceed 4M */
+   if (buffer_size > MAX_BATCH_SIZE) {
+	buffer_size = MAX_BATCH_SIZE;
+   }
 
     batch->intel = intel;
     batch->flag = flag;
     batch->run = drm_intel_bo_mrb_exec;
-    intel_batchbuffer_reset(batch);
+    intel_batchbuffer_reset(batch, buffer_size);
 
     return batch;
 }
@@ -108,7 +121,7 @@ intel_batchbuffer_flush(struct intel_batchbuffer *batch)
     dri_bo_unmap(batch->buffer);
     used = batch->ptr - batch->map;
     batch->run(batch->buffer, used, 0, 0, 0, batch->flag);
-    intel_batchbuffer_reset(batch);
+    intel_batchbuffer_reset(batch, batch->size);
 }
 
 void 
@@ -188,6 +201,13 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
                 OUT_BLT_BATCH(batch, 0);
                 OUT_BLT_BATCH(batch, 0);
                 ADVANCE_BLT_BATCH(batch);
+            }else if (batch->flag == I915_EXEC_VEBOX) {
+                BEGIN_VEB_BATCH(batch, 4);
+                OUT_VEB_BATCH(batch, MI_FLUSH_DW);
+                OUT_VEB_BATCH(batch, 0);
+                OUT_VEB_BATCH(batch, 0);
+                OUT_VEB_BATCH(batch, 0);
+                ADVANCE_VEB_BATCH(batch);
             } else {
                 assert(batch->flag == I915_EXEC_BSD);
                 BEGIN_BCS_BATCH(batch, 4);
@@ -202,8 +222,8 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
         if (batch->flag == I915_EXEC_RENDER) {
             BEGIN_BATCH(batch, 1);
             OUT_BATCH(batch, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
-            ADVANCE_BATCH(batch);
-        } else {
+            ADVANCE_BATCH(batch);		
+         } else {
             assert(batch->flag == I915_EXEC_BSD);
             BEGIN_BCS_BATCH(batch, 1);
             OUT_BCS_BATCH(batch, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
@@ -230,7 +250,8 @@ intel_batchbuffer_check_batchbuffer_flag(struct intel_batchbuffer *batch, int fl
 {
     if (flag != I915_EXEC_RENDER &&
         flag != I915_EXEC_BLT &&
-        flag != I915_EXEC_BSD)
+        flag != I915_EXEC_BSD &&
+        flag != I915_EXEC_VEBOX)
         return;
 
     if (batch->flag == flag)
@@ -276,8 +297,39 @@ intel_batchbuffer_start_atomic_bcs(struct intel_batchbuffer *batch, unsigned int
 }
 
 void
+intel_batchbuffer_start_atomic_veb(struct intel_batchbuffer *batch, unsigned int size)
+{
+    intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_VEBOX, size);
+}
+
+
+void
 intel_batchbuffer_end_atomic(struct intel_batchbuffer *batch)
 {
     assert(batch->atomic);
     batch->atomic = 0;
 }
+
+int
+intel_batchbuffer_used_size(struct intel_batchbuffer *batch)
+{
+    return batch->ptr - batch->map;
+}
+
+void
+intel_batchbuffer_align(struct intel_batchbuffer *batch, unsigned int alignedment)
+{
+    int used = batch->ptr - batch->map;
+    int pad_size;
+
+    assert((alignedment & 3) == 0);
+    pad_size = ALIGN(used, alignedment) - used;
+    assert((pad_size & 3) == 0);
+    assert(intel_batchbuffer_space(batch) >= pad_size);
+
+    while (pad_size >= 4) {
+        intel_batchbuffer_emit_dword(batch, 0);
+        pad_size -= 4;
+    }
+}
+
diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h
index 092da5a..70ceddb 100644
--- a/src/intel_batchbuffer.h
+++ b/src/intel_batchbuffer.h
@@ -26,11 +26,12 @@ struct intel_batchbuffer
                int DR4, unsigned int ring_flag);
 };
 
-struct intel_batchbuffer *intel_batchbuffer_new(struct intel_driver_data *intel, int flag);
+struct intel_batchbuffer *intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size);
 void intel_batchbuffer_free(struct intel_batchbuffer *batch);
 void intel_batchbuffer_start_atomic(struct intel_batchbuffer *batch, unsigned int size);
 void intel_batchbuffer_start_atomic_bcs(struct intel_batchbuffer *batch, unsigned int size);
 void intel_batchbuffer_start_atomic_blt(struct intel_batchbuffer *batch, unsigned int size);
+void intel_batchbuffer_start_atomic_veb(struct intel_batchbuffer *batch, unsigned int size);
 void intel_batchbuffer_end_atomic(struct intel_batchbuffer *batch);
 void intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, unsigned int x);
 void intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *bo, 
@@ -44,6 +45,8 @@ void intel_batchbuffer_begin_batch(struct intel_batchbuffer *batch, int total);
 void intel_batchbuffer_advance_batch(struct intel_batchbuffer *batch);
 void intel_batchbuffer_check_batchbuffer_flag(struct intel_batchbuffer *batch, int flag);
 int intel_batchbuffer_check_free_space(struct intel_batchbuffer *batch, int size);
+int intel_batchbuffer_used_size(struct intel_batchbuffer *batch);
+void intel_batchbuffer_align(struct intel_batchbuffer *batch, unsigned int alignedment);
 
 #define __BEGIN_BATCH(batch, n, f) do {                         \
         assert(f == batch->flag);                               \
@@ -70,11 +73,12 @@ int intel_batchbuffer_check_free_space(struct intel_batchbuffer *batch, int size
 #define BEGIN_BATCH(batch, n)           __BEGIN_BATCH(batch, n, I915_EXEC_RENDER)
 #define BEGIN_BLT_BATCH(batch, n)       __BEGIN_BATCH(batch, n, I915_EXEC_BLT)
 #define BEGIN_BCS_BATCH(batch, n)       __BEGIN_BATCH(batch, n, I915_EXEC_BSD)
-
+#define BEGIN_VEB_BATCH(batch, n)       __BEGIN_BATCH(batch, n, I915_EXEC_VEBOX)
 
 #define OUT_BATCH(batch, d)             __OUT_BATCH(batch, d)
 #define OUT_BLT_BATCH(batch, d)         __OUT_BATCH(batch, d)
 #define OUT_BCS_BATCH(batch, d)         __OUT_BATCH(batch, d)
+#define OUT_VEB_BATCH(batch, d)         __OUT_BATCH(batch, d)
 
 #define OUT_RELOC(batch, bo, read_domains, write_domain, delta) \
     __OUT_RELOC(batch, bo, read_domains, write_domain, delta)
@@ -86,5 +90,6 @@ int intel_batchbuffer_check_free_space(struct intel_batchbuffer *batch, int size
 #define ADVANCE_BATCH(batch)            __ADVANCE_BATCH(batch)
 #define ADVANCE_BLT_BATCH(batch)        __ADVANCE_BATCH(batch)
 #define ADVANCE_BCS_BATCH(batch)        __ADVANCE_BATCH(batch)
+#define ADVANCE_VEB_BATCH(batch)        __ADVANCE_BATCH(batch)
 
 #endif /* _INTEL_BATCHBUFFER_H_ */
diff --git a/src/intel_driver.c b/src/intel_driver.c
index 4e6df81..c150dc5 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -27,10 +27,7 @@
  *
  */
 
-#include <assert.h>
-
-#include <va/va_dricommon.h>
-
+#include "sysdeps.h"
 #include "intel_batchbuffer.h"
 #include "intel_memman.h"
 #include "intel_driver.h"
@@ -46,19 +43,42 @@ intel_driver_get_param(struct intel_driver_data *intel, int param, int *value)
    return drmCommandWriteRead(intel->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)) == 0;
 }
 
+static void intel_driver_get_revid(struct intel_driver_data *intel, int *value)
+{
+#define PCI_REVID	8
+	FILE *fp;
+	char config_data[16];
+	
+	fp = fopen("/sys/devices/pci0000:00/0000:00:02.0/config", "r");
+
+        if (fp) {
+            if (fread(config_data, 1, 16, fp))
+                *value = config_data[PCI_REVID];
+            else
+                *value = 2; /* assume it is at least  B-steping */
+            fclose(fp);
+        } else {
+            *value = 2; /* assume it is at least  B-steping */
+        }
+
+	return;
+}
+
 Bool 
 intel_driver_init(VADriverContextP ctx)
 {
     struct intel_driver_data *intel = intel_driver_data(ctx);
-    struct dri_state *dri_state = (struct dri_state *)ctx->dri_state;
+    struct drm_state * const drm_state = (struct drm_state *)ctx->drm_state;
     int has_exec2, has_bsd, has_blt;
 
-    assert(dri_state);
-    assert(dri_state->driConnectedFlag == VA_DRI2 || 
-           dri_state->driConnectedFlag == VA_DRI1);
+    assert(drm_state);
+    assert(VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI1) ||
+           VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI2) ||
+           VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_CUSTOM));
 
-    intel->fd = dri_state->fd;
-    intel->dri2Enabled = (dri_state->driConnectedFlag == VA_DRI2);
+    intel->fd = drm_state->fd;
+    intel->dri2Enabled = (VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI2) ||
+                          VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_CUSTOM));
 
     if (!intel->dri2Enabled) {
         return False;
@@ -74,7 +94,8 @@ intel_driver_init(VADriverContextP ctx)
         intel->has_bsd = has_bsd;
     if (intel_driver_get_param(intel, I915_PARAM_HAS_BLT, &has_blt))
         intel->has_blt = has_blt;
-
+   
+    intel_driver_get_revid(intel, &intel->revision);
     intel_memman_init(intel);
     return True;
 }
diff --git a/src/intel_driver.h b/src/intel_driver.h
index 339ff3f..8d83469 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -10,6 +10,7 @@
 #include <intel_bufmgr.h>
 
 #include <va/va_backend.h>
+#include "va_backend_compat.h"
 
 #include "intel_compiler.h"
 
@@ -37,7 +38,9 @@
 #define XY_COLOR_BLT_DST_TILED                  (1 << 11)
 
 /* BR13 */
+#define BR13_8                                  (0x0 << 24)
 #define BR13_565                                (0x1 << 24)
+#define BR13_1555                               (0x2 << 24)
 #define BR13_8888                               (0x3 << 24)
 
 #define CMD_PIPE_CONTROL                        (CMD_3D | (3 << 27) | (2 << 24) | (0 << 16))
@@ -104,6 +107,7 @@ struct intel_driver_data
 {
     int fd;
     int device_id;
+    int revision;
 
     int dri2Enabled;
 
@@ -168,6 +172,46 @@ struct intel_region
 #define PCI_CHIP_IVYBRIDGE_S_GT1        0x015a  /* Server */
 #define PCI_CHIP_IVYBRIDGE_S_GT2        0x016a
 
+#define PCI_CHIP_HASWELL_GT1            0x0402 /* Desktop */
+#define PCI_CHIP_HASWELL_GT2            0x0412
+#define PCI_CHIP_HASWELL_GT2_PLUS       0x0422
+#define PCI_CHIP_HASWELL_M_GT1          0x0406 /* Mobile */
+#define PCI_CHIP_HASWELL_M_GT2          0x0416
+#define PCI_CHIP_HASWELL_M_GT2_PLUS     0x0426
+#define PCI_CHIP_HASWELL_S_GT1          0x040a /* Server */
+#define PCI_CHIP_HASWELL_S_GT2          0x041a
+#define PCI_CHIP_HASWELL_S_GT2_PLUS     0x042a
+
+#define	PCI_CHIP_HASWELL_SDV_GT1		0x0c02 /* Desktop */
+#define	PCI_CHIP_HASWELL_SDV_GT2		0x0c12
+#define	PCI_CHIP_HASWELL_SDV_GT2_PLUS		0x0c22
+#define	PCI_CHIP_HASWELL_SDV_M_GT1		0x0c06 /* Mobile */
+#define	PCI_CHIP_HASWELL_SDV_M_GT2		0x0c16
+#define	PCI_CHIP_HASWELL_SDV_M_GT2_PLUS		0x0c26
+#define	PCI_CHIP_HASWELL_SDV_S_GT1		0x0c0a /* Server */
+#define	PCI_CHIP_HASWELL_SDV_S_GT2		0x0c1a
+#define	PCI_CHIP_HASWELL_SDV_S_GT2_PLUS		0x0c2a
+
+#define	PCI_CHIP_HASWELL_ULT_GT1		0x0A02 /* Desktop */
+#define	PCI_CHIP_HASWELL_ULT_GT2		0x0A12
+#define	PCI_CHIP_HASWELL_ULT_GT2_PLUS		0x0A22
+#define	PCI_CHIP_HASWELL_ULT_M_GT1		0x0A06 /* Mobile */
+#define	PCI_CHIP_HASWELL_ULT_M_GT2		0x0A16
+#define	PCI_CHIP_HASWELL_ULT_M_GT2_PLUS		0x0A26
+#define	PCI_CHIP_HASWELL_ULT_S_GT1		0x0A0A /* Server */
+#define	PCI_CHIP_HASWELL_ULT_S_GT2		0x0A1A
+#define	PCI_CHIP_HASWELL_ULT_S_GT2_PLUS		0x0A2A
+
+#define	PCI_CHIP_HASWELL_CRW_GT1		0x0D12 /* Desktop */
+#define	PCI_CHIP_HASWELL_CRW_GT2		0x0D22
+#define	PCI_CHIP_HASWELL_CRW_GT2_PLUS		0x0D32
+#define	PCI_CHIP_HASWELL_CRW_M_GT1		0x0D16 /* Mobile */
+#define	PCI_CHIP_HASWELL_CRW_M_GT2		0x0D26
+#define	PCI_CHIP_HASWELL_CRW_M_GT2_PLUS		0x0D36
+#define	PCI_CHIP_HASWELL_CRW_S_GT1		0x0D1A /* Server */
+#define	PCI_CHIP_HASWELL_CRW_S_GT2		0x0D2A
+#define	PCI_CHIP_HASWELL_CRW_S_GT2_PLUS		0x0D3A
+
 #define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G ||   \
                                  devid == PCI_CHIP_Q45_G ||     \
                                  devid == PCI_CHIP_G45_G ||     \
@@ -182,6 +226,51 @@ struct intel_region
 #define IS_IRONLAKE_M(devid)    (devid == PCI_CHIP_IRONLAKE_M_G)
 #define IS_IRONLAKE(devid)      (IS_IRONLAKE_D(devid) || IS_IRONLAKE_M(devid))
 
+#define IS_HASWELL_ULT(devid)   (devid == PCI_CHIP_HASWELL_ULT_GT1	|| \
+				 devid == PCI_CHIP_HASWELL_ULT_GT2	|| \
+				 devid == PCI_CHIP_HASWELL_ULT_GT2_PLUS	|| \
+				 devid == PCI_CHIP_HASWELL_ULT_M_GT1	|| \
+				 devid == PCI_CHIP_HASWELL_ULT_M_GT2	|| \
+				 devid == PCI_CHIP_HASWELL_ULT_M_GT2_PLUS	|| \
+				 devid == PCI_CHIP_HASWELL_ULT_S_GT1	|| \
+				 devid == PCI_CHIP_HASWELL_ULT_S_GT2	|| \
+				 devid == PCI_CHIP_HASWELL_ULT_S_GT2_PLUS)
+
+#define IS_HSW_GT1(devid)   	(devid == PCI_CHIP_HASWELL_GT1		|| \
+                                 devid == PCI_CHIP_HASWELL_M_GT1	|| \
+                                 devid == PCI_CHIP_HASWELL_S_GT1	|| \
+                                 devid == PCI_CHIP_HASWELL_SDV_GT1	|| \
+                                 devid == PCI_CHIP_HASWELL_SDV_M_GT1	|| \
+                                 devid == PCI_CHIP_HASWELL_SDV_S_GT1	|| \
+                                 devid == PCI_CHIP_HASWELL_CRW_GT1	|| \
+                                 devid == PCI_CHIP_HASWELL_CRW_M_GT1	|| \
+                                 devid == PCI_CHIP_HASWELL_CRW_S_GT1)
+
+#define IS_HSW_GT2(devid)   	(devid == PCI_CHIP_HASWELL_GT2		|| \
+                                 devid == PCI_CHIP_HASWELL_M_GT2	|| \
+                                 devid == PCI_CHIP_HASWELL_S_GT2	|| \
+                                 devid == PCI_CHIP_HASWELL_SDV_GT2	|| \
+                                 devid == PCI_CHIP_HASWELL_SDV_M_GT2	|| \
+                                 devid == PCI_CHIP_HASWELL_SDV_S_GT2	|| \
+                                 devid == PCI_CHIP_HASWELL_CRW_GT2	|| \
+                                 devid == PCI_CHIP_HASWELL_CRW_M_GT2	|| \
+                                 devid == PCI_CHIP_HASWELL_CRW_S_GT2)
+
+#define IS_HSW_GT2_PLUS(devid)	(devid == PCI_CHIP_HASWELL_GT2_PLUS		|| \
+                                 devid == PCI_CHIP_HASWELL_M_GT2_PLUS		|| \
+                                 devid == PCI_CHIP_HASWELL_S_GT2_PLUS		|| \
+                                 devid == PCI_CHIP_HASWELL_SDV_GT2_PLUS		|| \
+                                 devid == PCI_CHIP_HASWELL_SDV_M_GT2_PLUS	|| \
+                                 devid == PCI_CHIP_HASWELL_SDV_S_GT2_PLUS	|| \
+                                 devid == PCI_CHIP_HASWELL_CRW_GT2_PLUS		|| \
+                                 devid == PCI_CHIP_HASWELL_CRW_M_GT2_PLUS	|| \
+                                 devid == PCI_CHIP_HASWELL_CRW_S_GT2_PLUS)
+
+#define IS_HASWELL(devid)       (IS_HSW_GT1(devid) || \
+				 IS_HSW_GT2(devid) || \
+				 IS_HSW_GT2_PLUS(devid) || \
+				 IS_HASWELL_ULT(devid))
+
 #define IS_GEN6(devid)          (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
                                  devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
                                  devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS ||\
@@ -195,6 +284,11 @@ struct intel_region
                                  devid == PCI_CHIP_IVYBRIDGE_M_GT1 ||   \
                                  devid == PCI_CHIP_IVYBRIDGE_M_GT2 ||   \
                                  devid == PCI_CHIP_IVYBRIDGE_S_GT1 ||   \
-                                 devid == PCI_CHIP_IVYBRIDGE_S_GT2)
+                                 devid == PCI_CHIP_IVYBRIDGE_S_GT2 ||   \
+                                 IS_HASWELL(devid))
+
+#ifndef I915_EXEC_VEBOX
+#define I915_EXEC_VEBOX         4
+#endif
 
 #endif /* _INTEL_DRIVER_H_ */
diff --git a/src/shaders/h264/mc/avc_mc.g4b.gen5 b/src/shaders/h264/mc/avc_mc.g4b.gen5
index cdee6ac..7048e1f 100644
--- a/src/shaders/h264/mc/avc_mc.g4b.gen5
+++ b/src/shaders/h264/mc/avc_mc.g4b.gen5
@@ -657,7 +657,7 @@
    { 0x00800001, 0x21300232, 0x00a904cc, 0x00000000 },
    { 0x01600031, 0x27800021, 0x508d07c0, 0x00000200 },
    { 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27c00021, 0x008d0000, 0x00000000 },
    { 0x00200009, 0x27fa3e29, 0x00450064, 0x00040004 },
    { 0x00000401, 0x27c80061, 0x00000000, 0x000f000f },
@@ -687,9 +687,9 @@
    { 0x00000001, 0x27800021, 0x00000780, 0x00000000 },
    { 0x00000001, 0x27a00021, 0x000007a0, 0x00000000 },
    { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 },
    { 0x00000005, 0x20203e2d, 0x00000061, 0x001f001f },
    { 0x00200009, 0x20643e2d, 0x00450064, 0x00040004 },
@@ -1817,7 +1817,7 @@
    { 0x00000001, 0x21400021, 0x00000140, 0x00000000 },
    { 0x00000001, 0x21600021, 0x00000160, 0x00000000 },
    { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x20400021, 0x008d0000, 0x00000000 },
    { 0x00000005, 0x20203e2d, 0x00000061, 0x001f001f },
    { 0x00200009, 0x20643e2d, 0x00450064, 0x00040004 },
@@ -2393,7 +2393,7 @@
    { 0x00000001, 0x21400021, 0x00000140, 0x00000000 },
    { 0x00000001, 0x21600021, 0x00000160, 0x00000000 },
    { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x01000005, 0x20602d29, 0x02000042, 0x00070007 },
    { 0x01000005, 0x20422d29, 0x00000042, 0xfff8fff8 },
    { 0x00000001, 0x202001e9, 0x00000000, 0x100c100c },
@@ -2563,9 +2563,9 @@
    { 0x00000040, 0x22040c00, 0x00000204, 0x00400040 },
    { 0x00110220, 0x34001c00, 0x00001400, 0xffffffda },
    { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x01000005, 0x20602d29, 0x02000042, 0x00070007 },
    { 0x01000005, 0x20422d29, 0x00000042, 0xfff8fff8 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
@@ -2935,9 +2935,9 @@
    { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
    { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
    { 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
    { 0x00000001, 0x206601ed, 0x00000000, 0x00030003 },
@@ -3024,8 +3024,8 @@
    { 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc },
    { 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
    { 0x01600031, 0x20001c24, 0x708d0000, 0x82000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00600009, 0x25c03dad, 0x00050034, 0x00040004 },
    { 0x00000001, 0x220801ec, 0x00000000, 0x02400240 },
@@ -3355,9 +3355,9 @@
    { 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
    { 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
    { 0x00000001, 0x34000020, 0x000007c0, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
    { 0x00000001, 0x206601ed, 0x00000000, 0x00030003 },
@@ -3441,7 +3441,7 @@
    { 0x00000220, 0x34001c00, 0x00001400, 0xffffffe8 },
    { 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
    { 0x01600031, 0x20001c24, 0x708d0000, 0x82000012 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00600009, 0x25c03dad, 0x00050034, 0x00040004 },
    { 0x00000001, 0x220801ec, 0x00000000, 0x02400240 },
@@ -3657,7 +3657,7 @@
    { 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
    { 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
    { 0x00000001, 0x34000020, 0x000007c0, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
    { 0x00000001, 0x206601ed, 0x00000000, 0x00030003 },
@@ -3744,8 +3744,8 @@
    { 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc },
    { 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
    { 0x01600031, 0x20001c24, 0x708d0000, 0x82000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00600009, 0x25c03dad, 0x00050034, 0x00040004 },
    { 0x00000001, 0x220801ec, 0x00000000, 0x02400240 },
@@ -3984,8 +3984,8 @@
    { 0x00000c01, 0x26100169, 0x00000000, 0x12121212 },
    { 0x00400801, 0x26000171, 0x00000000, 0xffffffff },
    { 0x00600031, 0x20001c20, 0x308d0600, 0x82008002 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
    { 0x00000001, 0x206601ed, 0x00000000, 0x00030003 },
@@ -4069,7 +4069,7 @@
    { 0x00000220, 0x34001c00, 0x00001400, 0xffffffe8 },
    { 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
    { 0x01600031, 0x20001c24, 0x708d0000, 0x82000012 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00600009, 0x25c03dad, 0x00050034, 0x00040004 },
    { 0x00000001, 0x220801ec, 0x00000000, 0x02400240 },
@@ -4268,8 +4268,8 @@
    { 0x00000c01, 0x26100169, 0x00000000, 0x12121212 },
    { 0x00400801, 0x26000171, 0x00000000, 0xffffffff },
    { 0x00600031, 0x20001c20, 0x308d0600, 0x82008002 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
    { 0x00000001, 0x206601ed, 0x00000000, 0x00010001 },
@@ -4357,7 +4357,7 @@
    { 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc },
    { 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
    { 0x01600031, 0x20001c24, 0x708d0000, 0x82000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00400441, 0x45c03dad, 0x00000034, 0x00100010 },
    { 0x00400841, 0x45c23dad, 0x00000036, 0x00200020 },
@@ -4796,8 +4796,8 @@
    { 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
    { 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
    { 0x00000001, 0x34000020, 0x000007c0, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
    { 0x00000001, 0x206601ed, 0x00000000, 0x00010001 },
@@ -4881,7 +4881,7 @@
    { 0x00000220, 0x34001c00, 0x00001400, 0xffffffe8 },
    { 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
    { 0x01600031, 0x20001c24, 0x708d0000, 0x82000012 },
-   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
    { 0x00400441, 0x45c03dad, 0x00000034, 0x00100010 },
    { 0x00400841, 0x45c23dad, 0x00000036, 0x00200020 },
diff --git a/src/shaders/post_processing/Common/PA_Load_8x8.asm b/src/shaders/post_processing/Common/PA_Load_8x8.asm
deleted file mode 100644
index 3569bd1..0000000
--- a/src/shaders/post_processing/Common/PA_Load_8x8.asm
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * All Video Processing kernels 
- * Copyright © <2010>, Intel Corporation.
- *
- * This program is licensed under the terms and conditions of the
- * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
- * http://www.opensource.org/licenses/eclipse-1.0.php.
- *
- */
-
-// Module name: PA_Load_8x8.asm
-//----------------------------------------------------------------
-
-#define  PA_LOAD_8x8
-#include "PA_Load.inc"
-
-//  Load 16x8 packed data block
-//  Packed data block should be loaded as 32x8 pixel block
-    add  (2) rMSGSRC.0<1>:d     wORIX<2;2,1>:w    wSRC_H_ORI_OFFSET<2;2,1>:w       // Source Block origin
-    shl  (1) rMSGSRC.0<1>:d     acc0:w            1:w                              // H. block origin need to be doubled
-    mov  (1) rMSGSRC.2<1>:ud    nDPR_BLOCK_SIZE_YUV:ud                             // Block width and height (32x8)
-    mov  (8) mMSGHDRY<1>:ud     rMSGSRC<8;8,1>:ud
-    send (8) udSRC_YUV(0)<1>    mMSGHDRY    udDUMMY_NULL    nDATAPORT_READ    nDPMR_MSGDSC+nDPR_MSG_SIZE_YUV+nBI_CURRENT_SRC_YUV:ud
-
-//  Unpack to "planar" YUV422 format in word-aligned bytes
-    add  (4) pCF_Y_OFFSET<1>:uw    ubSRC_CF_OFFSET<4;4,1>:ub    nSRC_YUV_REG*nGRFWIB:w    // Initial Y,U,V offset in YUV422 block
-    $for(0; <nY_NUM_OF_ROWS; 1) {
-        mov (16)  uwDEST_Y(0, %1*16)<1>     r[pCF_Y_OFFSET, %1*nGRFWIB]REGION(16,2)
-        mov (8)   uwDEST_U(0, %1*8)<1>      r[pCF_U_OFFSET, %1*nGRFWIB]REGION(8,4)
-        mov (8)   uwDEST_V(0, %1*8)<1>      r[pCF_V_OFFSET, %1*nGRFWIB]REGION(8,4)
-    }
-
-// End of PA_Load_8x8
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm b/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm
deleted file mode 100644
index 6e67557..0000000
--- a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * All Video Processing kernels 
- * Copyright © <2010>, Intel Corporation.
- *
- * This program is licensed under the terms and conditions of the
- * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
- * http://www.opensource.org/licenses/eclipse-1.0.php.
- *
- */
-
-//---------- PL2_AVS_IEF_Unpack_16x8.asm ----------
-        
-#ifdef AVS_OUTPUT_16_BIT	//Output is packed in AVYU format
-// Move first 8x8 words of Y to dest GRF (as packed)
-    mov (4) uwDEST_Y(0,1)<4>       uwAVS_RESPONSE(0,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(1,1)<4>       uwAVS_RESPONSE(0,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(4,1)<4>       uwAVS_RESPONSE(0,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(5,1)<4>       uwAVS_RESPONSE(0,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(8,1)<4>       uwAVS_RESPONSE(1,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(9,1)<4>       uwAVS_RESPONSE(1,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(12,1)<4>      uwAVS_RESPONSE(1,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(13,1)<4>      uwAVS_RESPONSE(1,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(16,1)<4>      uwAVS_RESPONSE(2,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(17,1)<4>      uwAVS_RESPONSE(2,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(20,1)<4>      uwAVS_RESPONSE(2,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(21,1)<4>      uwAVS_RESPONSE(2,12)<4;4,1>                                   
-    mov (4) uwDEST_Y(24,1)<4>      uwAVS_RESPONSE(3,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(25,1)<4>      uwAVS_RESPONSE(3,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(28,1)<4>      uwAVS_RESPONSE(3,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(29,1)<4>      uwAVS_RESPONSE(3,12)<4;4,1>                                   
-
-// Move first 8x8 words of U to dest GRF (as packed)
-    mov (4) uwDEST_Y(0,0)<4>       uwAVS_RESPONSE(4,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(1,0)<4>       uwAVS_RESPONSE(4,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(4,0)<4>       uwAVS_RESPONSE(4,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(5,0)<4>       uwAVS_RESPONSE(4,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(8,0)<4>       uwAVS_RESPONSE(5,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(9,0)<4>       uwAVS_RESPONSE(5,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(12,0)<4>      uwAVS_RESPONSE(5,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(13,0)<4>      uwAVS_RESPONSE(5,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(16,0)<4>      uwAVS_RESPONSE(8,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(17,0)<4>      uwAVS_RESPONSE(8,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(20,0)<4>      uwAVS_RESPONSE(8,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(21,0)<4>      uwAVS_RESPONSE(8,12)<4;4,1>                                   
-    mov (4) uwDEST_Y(24,0)<4>      uwAVS_RESPONSE(9,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(25,0)<4>      uwAVS_RESPONSE(9,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(28,0)<4>      uwAVS_RESPONSE(9,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(29,0)<4>      uwAVS_RESPONSE(9,12)<4;4,1>                                   
-
-// Move first 8x8 words of V to dest GRF (as packed)
-    mov (4) uwDEST_Y(0,2)<4>       uwAVS_RESPONSE(6,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(1,2)<4>       uwAVS_RESPONSE(6,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(4,2)<4>       uwAVS_RESPONSE(6,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(5,2)<4>       uwAVS_RESPONSE(6,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(8,2)<4>       uwAVS_RESPONSE(7,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(9,2)<4>       uwAVS_RESPONSE(7,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(12,2)<4>      uwAVS_RESPONSE(7,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(13,2)<4>      uwAVS_RESPONSE(7,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(16,2)<4>      uwAVS_RESPONSE(10,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(17,2)<4>      uwAVS_RESPONSE(10,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(20,2)<4>      uwAVS_RESPONSE(10,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(21,2)<4>      uwAVS_RESPONSE(10,12)<4;4,1>                                   
-    mov (4) uwDEST_Y(24,2)<4>      uwAVS_RESPONSE(11,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(25,2)<4>      uwAVS_RESPONSE(11,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(28,2)<4>      uwAVS_RESPONSE(11,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(29,2)<4>      uwAVS_RESPONSE(11,12)<4;4,1>                                   
-
-// Move first 8x8 words of A to dest GRF (as packed)
-    mov (4) uwDEST_Y(0,3)<4>       0:uw                                    
-    mov (4) uwDEST_Y(1,3)<4>       0:uw                                    
-    mov (4) uwDEST_Y(4,3)<4>       0:uw                                  
-    mov (4) uwDEST_Y(5,3)<4>       0:uw                                   
-    mov (4) uwDEST_Y(8,3)<4>       0:uw                                    
-    mov (4) uwDEST_Y(9,3)<4>       0:uw                                    
-    mov (4) uwDEST_Y(12,3)<4>      0:uw                                  
-    mov (4) uwDEST_Y(13,3)<4>      0:uw                                   
-    mov (4) uwDEST_Y(16,3)<4>      0:uw                                    
-    mov (4) uwDEST_Y(17,3)<4>      0:uw                                    
-    mov (4) uwDEST_Y(20,3)<4>      0:uw                                  
-    mov (4) uwDEST_Y(21,3)<4>      0:uw                                   
-    mov (4) uwDEST_Y(24,3)<4>      0:uw                                    
-    mov (4) uwDEST_Y(25,3)<4>      0:uw                                    
-    mov (4) uwDEST_Y(28,3)<4>      0:uw                                  
-    mov (4) uwDEST_Y(29,3)<4>      0:uw                                   
-
-// Move second 8x8 words of Y to dest GRF
-    mov (4) uwDEST_Y(2,1)<4>       uwAVS_RESPONSE_2(0,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(3,1)<4>       uwAVS_RESPONSE_2(0,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(6,1)<4>       uwAVS_RESPONSE_2(0,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(7,1)<4>       uwAVS_RESPONSE_2(0,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(10,1)<4>      uwAVS_RESPONSE_2(1,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(11,1)<4>      uwAVS_RESPONSE_2(1,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(14,1)<4>      uwAVS_RESPONSE_2(1,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(15,1)<4>      uwAVS_RESPONSE_2(1,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(18,1)<4>      uwAVS_RESPONSE_2(2,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(19,1)<4>      uwAVS_RESPONSE_2(2,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(22,1)<4>      uwAVS_RESPONSE_2(2,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(23,1)<4>      uwAVS_RESPONSE_2(2,12)<4;4,1>                                   
-    mov (4) uwDEST_Y(26,1)<4>      uwAVS_RESPONSE_2(3,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(27,1)<4>      uwAVS_RESPONSE_2(3,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(30,1)<4>      uwAVS_RESPONSE_2(3,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(31,1)<4>      uwAVS_RESPONSE_2(3,12)<4;4,1>                                   
-
-// Move second 8x8 words of U to dest GRF
-    mov (4) uwDEST_Y(2,0)<4>       uwAVS_RESPONSE_2(4,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(3,0)<4>       uwAVS_RESPONSE_2(4,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(6,0)<4>       uwAVS_RESPONSE_2(4,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(7,0)<4>       uwAVS_RESPONSE_2(4,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(10,0)<4>      uwAVS_RESPONSE_2(5,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(11,0)<4>      uwAVS_RESPONSE_2(5,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(14,0)<4>      uwAVS_RESPONSE_2(5,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(15,0)<4>      uwAVS_RESPONSE_2(5,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(18,0)<4>      uwAVS_RESPONSE_2(8,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(19,0)<4>      uwAVS_RESPONSE_2(8,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(22,0)<4>      uwAVS_RESPONSE_2(8,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(23,0)<4>      uwAVS_RESPONSE_2(8,12)<4;4,1>                                   
-    mov (4) uwDEST_Y(26,0)<4>      uwAVS_RESPONSE_2(9,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(27,0)<4>      uwAVS_RESPONSE_2(9,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(30,0)<4>      uwAVS_RESPONSE_2(9,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(31,0)<4>      uwAVS_RESPONSE_2(9,12)<4;4,1>                                   
-
-// Move second 8x8 words of V to dest GRF
-    mov (4) uwDEST_Y(2,2)<4>       uwAVS_RESPONSE_2(6,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(3,2)<4>       uwAVS_RESPONSE_2(6,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(6,2)<4>       uwAVS_RESPONSE_2(6,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(7,2)<4>       uwAVS_RESPONSE_2(6,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(10,2)<4>      uwAVS_RESPONSE_2(7,0)<4;4,1>                                      
-    mov (4) uwDEST_Y(11,2)<4>      uwAVS_RESPONSE_2(7,8)<4;4,1>                                      
-    mov (4) uwDEST_Y(14,2)<4>      uwAVS_RESPONSE_2(7,4)<4;4,1>                                    
-    mov (4) uwDEST_Y(15,2)<4>      uwAVS_RESPONSE_2(7,12)<4;4,1>                                    
-    mov (4) uwDEST_Y(18,2)<4>      uwAVS_RESPONSE_2(10,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(19,2)<4>      uwAVS_RESPONSE_2(10,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(22,2)<4>      uwAVS_RESPONSE_2(10,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(23,2)<4>      uwAVS_RESPONSE_2(10,12)<4;4,1>                                   
-    mov (4) uwDEST_Y(26,2)<4>      uwAVS_RESPONSE_2(11,0)<4;4,1>                                     
-    mov (4) uwDEST_Y(27,2)<4>      uwAVS_RESPONSE_2(11,8)<4;4,1>                                     
-    mov (4) uwDEST_Y(30,2)<4>      uwAVS_RESPONSE_2(11,4)<4;4,1>                                   
-    mov (4) uwDEST_Y(31,2)<4>      uwAVS_RESPONSE_2(11,12)<4;4,1>                                   
-
-// Move second 8x8 words of A to dest GRF
-    mov (4) uwDEST_Y(2,3)<4>       0:uw                                    
-    mov (4) uwDEST_Y(3,3)<4>       0:uw                                    
-    mov (4) uwDEST_Y(6,3)<4>       0:uw                                  
-    mov (4) uwDEST_Y(7,3)<4>       0:uw                                   
-    mov (4) uwDEST_Y(10,3)<4>      0:uw                                    
-    mov (4) uwDEST_Y(11,3)<4>      0:uw                                    
-    mov (4) uwDEST_Y(14,3)<4>      0:uw                                  
-    mov (4) uwDEST_Y(15,3)<4>      0:uw                                   
-    mov (4) uwDEST_Y(18,3)<4>      0:uw                                    
-    mov (4) uwDEST_Y(19,3)<4>      0:uw                                    
-    mov (4) uwDEST_Y(22,3)<4>      0:uw                                  
-    mov (4) uwDEST_Y(23,3)<4>      0:uw                                   
-    mov (4) uwDEST_Y(26,3)<4>      0:uw                                    
-    mov (4) uwDEST_Y(27,3)<4>      0:uw                                    
-    mov (4) uwDEST_Y(30,3)<4>      0:uw                                  
-    mov (4) uwDEST_Y(31,3)<4>      0:uw                                   
-
-/*	This section will be used if 16-bit output is needed in planar format -vK
-     // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF.
-    $for(0; <8/2; 1) {
-        mov (8) uwDEST_Y(%1*2)<1>        uwAVS_RESPONSE(%1,0)<8;4,1>     
-        mov (8) uwDEST_Y(%1*2+1)<1>      uwAVS_RESPONSE(%1,8)<8;4,1>   
-    } 
-    
-    // Move 1st 8x8 words of U to dest GRF  (Copy high byte in a word)
-    mov (8) uwDEST_U(0)<1>           uwAVS_RESPONSE(4,0)<8;4,1>      
-    mov (8) uwDEST_U(1)<1>           uwAVS_RESPONSE(4,8)<8;4,1>    
-    mov (8) uwDEST_U(2)<1>           uwAVS_RESPONSE(5,0)<8;4,1>      
-    mov (8) uwDEST_U(3)<1>           uwAVS_RESPONSE(5,8)<8;4,1>    
-    mov (8) uwDEST_U(4)<1>           uwAVS_RESPONSE(8,0)<8;4,1>      
-    mov (8) uwDEST_U(5)<1>           uwAVS_RESPONSE(8,8)<8;4,1>    
-    mov (8) uwDEST_U(6)<1>           uwAVS_RESPONSE(9,0)<8;4,1>      
-    mov (8) uwDEST_U(7)<1>           uwAVS_RESPONSE(9,8)<8;4,1>    
-
-    // Move 1st 8x8 words of V to dest GRF  
-    mov (8) uwDEST_V(0)<1>           uwAVS_RESPONSE(6,0)<8;4,1>      
-    mov (8) uwDEST_V(1)<1>           uwAVS_RESPONSE(6,8)<8;4,1>    
-    mov (8) uwDEST_V(2)<1>           uwAVS_RESPONSE(7,0)<8;4,1>      
-    mov (8) uwDEST_V(3)<1>           uwAVS_RESPONSE(7,8)<8;4,1>    
-    mov (8) uwDEST_V(4)<1>           uwAVS_RESPONSE(10,0)<8;4,1>     
-    mov (8) uwDEST_V(5)<1>           uwAVS_RESPONSE(10,8)<8;4,1>   
-    mov (8) uwDEST_V(6)<1>           uwAVS_RESPONSE(11,0)<8;4,1>     
-    mov (8) uwDEST_V(7)<1>           uwAVS_RESPONSE(11,8)<8;4,1>   
-
-    // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF.
-    $for(0; <8/2; 1) {
-        mov (8) uwDEST_Y(%1*2,8)<1>      uwAVS_RESPONSE_2(%1,0)<8;4,1> 
-        mov (8) uwDEST_Y(%1*2+1,8)<1>    uwAVS_RESPONSE_2(%1,8)<8;4,1> 
-    } 
-
-    // Move 2st 8x8 words of U to dest GRF  (Copy high byte in a word)
-    mov (8) uwDEST_U(0,8)<1>         uwAVS_RESPONSE_2(4,0)<8;4,1>      
-    mov (8) uwDEST_U(1,8)<1>         uwAVS_RESPONSE_2(4,8)<8;4,1>    
-    mov (8) uwDEST_U(2,8)<1>         uwAVS_RESPONSE_2(5,0)<8;4,1>      
-    mov (8) uwDEST_U(3,8)<1>         uwAVS_RESPONSE_2(5,8)<8;4,1>    
-    mov (8) uwDEST_U(4,8)<1>         uwAVS_RESPONSE_2(8,0)<8;4,1>      
-    mov (8) uwDEST_U(5,8)<1>         uwAVS_RESPONSE_2(8,8)<8;4,1>    
-    mov (8) uwDEST_U(6,8)<1>         uwAVS_RESPONSE_2(9,0)<8;4,1>      
-    mov (8) uwDEST_U(7,8)<1>         uwAVS_RESPONSE_2(9,8)<8;4,1>    
-
-    // Move 2st 8x8 words of V to dest GRF  
-    mov (8) uwDEST_V(0,8)<1>         uwAVS_RESPONSE_2(6,0)<8;4,1>      
-    mov (8) uwDEST_V(1,8)<1>         uwAVS_RESPONSE_2(6,8)<8;4,1>    
-    mov (8) uwDEST_V(2,8)<1>         uwAVS_RESPONSE_2(7,0)<8;4,1>      
-    mov (8) uwDEST_V(3,8)<1>         uwAVS_RESPONSE_2(7,8)<8;4,1>    
-    mov (8) uwDEST_V(4,8)<1>         uwAVS_RESPONSE_2(10,0)<8;4,1>     
-    mov (8) uwDEST_V(5,8)<1>         uwAVS_RESPONSE_2(10,8)<8;4,1>   
-    mov (8) uwDEST_V(6,8)<1>         uwAVS_RESPONSE_2(11,0)<8;4,1>     
-    mov (8) uwDEST_V(7,8)<1>         uwAVS_RESPONSE_2(11,8)<8;4,1>   
-*/
-#else
-    // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF.
-    $for(0; <8/2; 1) {
-        mov (8) uwDEST_Y(%1*2)<1>        ubAVS_RESPONSE(%1,1)<16;4,2>      // Copy high byte in a word
-        mov (8) uwDEST_Y(%1*2+1)<1>      ubAVS_RESPONSE(%1,8+1)<16;4,2>    // Copy high byte in a word
-    } 
-
-    // Move 1st 8x8 words of U to dest GRF  (Copy high byte in a word)
-    mov (8) uwDEST_V(0)<1>           ubAVS_RESPONSE(4,1)<16;4,2>      
-    mov (8) uwDEST_V(1)<1>           ubAVS_RESPONSE(4,8+1)<16;4,2>    
-    mov (8) uwDEST_V(2)<1>           ubAVS_RESPONSE(5,1)<16;4,2>      
-    mov (8) uwDEST_V(3)<1>           ubAVS_RESPONSE(5,8+1)<16;4,2>    
-    mov (8) uwDEST_V(4)<1>           ubAVS_RESPONSE(8,1)<16;4,2>      
-    mov (8) uwDEST_V(5)<1>           ubAVS_RESPONSE(8,8+1)<16;4,2>    
-    mov (8) uwDEST_V(6)<1>           ubAVS_RESPONSE(9,1)<16;4,2>      
-    mov (8) uwDEST_V(7)<1>           ubAVS_RESPONSE(9,8+1)<16;4,2>    
-
-    // Move 1st 8x8 words of V to dest GRF  
-    mov (8) uwDEST_U(0)<1>           ubAVS_RESPONSE(6,1)<16;4,2>      
-    mov (8) uwDEST_U(1)<1>           ubAVS_RESPONSE(6,8+1)<16;4,2>    
-    mov (8) uwDEST_U(2)<1>           ubAVS_RESPONSE(7,1)<16;4,2>      
-    mov (8) uwDEST_U(3)<1>           ubAVS_RESPONSE(7,8+1)<16;4,2>    
-    mov (8) uwDEST_U(4)<1>           ubAVS_RESPONSE(10,1)<16;4,2>     
-    mov (8) uwDEST_U(5)<1>           ubAVS_RESPONSE(10,8+1)<16;4,2>   
-    mov (8) uwDEST_U(6)<1>           ubAVS_RESPONSE(11,1)<16;4,2>     
-    mov (8) uwDEST_U(7)<1>           ubAVS_RESPONSE(11,8+1)<16;4,2>   
-
-    // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF.
-    $for(0; <8/2; 1) {
-        mov (8) uwDEST_Y(%1*2,8)<1>      ubAVS_RESPONSE_2(%1,1)<16;4,2>    // Copy high byte in a word
-        mov (8) uwDEST_Y(%1*2+1,8)<1>    ubAVS_RESPONSE_2(%1,8+1)<16;4,2>  // Copy high byte in a word
-    } 
-
-    // Move 2st 8x8 words of U to dest GRF  (Copy high byte in a word)
-    mov (8) uwDEST_V(0,8)<1>         ubAVS_RESPONSE_2(4,1)<16;4,2>      
-    mov (8) uwDEST_V(1,8)<1>         ubAVS_RESPONSE_2(4,8+1)<16;4,2>    
-    mov (8) uwDEST_V(2,8)<1>         ubAVS_RESPONSE_2(5,1)<16;4,2>      
-    mov (8) uwDEST_V(3,8)<1>         ubAVS_RESPONSE_2(5,8+1)<16;4,2>    
-    mov (8) uwDEST_V(4,8)<1>         ubAVS_RESPONSE_2(8,1)<16;4,2>      
-    mov (8) uwDEST_V(5,8)<1>         ubAVS_RESPONSE_2(8,8+1)<16;4,2>    
-    mov (8) uwDEST_V(6,8)<1>         ubAVS_RESPONSE_2(9,1)<16;4,2>      
-    mov (8) uwDEST_V(7,8)<1>         ubAVS_RESPONSE_2(9,8+1)<16;4,2>    
-
-    // Move 2st 8x8 words of V to dest GRF  
-    mov (8) uwDEST_U(0,8)<1>         ubAVS_RESPONSE_2(6,1)<16;4,2>      
-    mov (8) uwDEST_U(1,8)<1>         ubAVS_RESPONSE_2(6,8+1)<16;4,2>    
-    mov (8) uwDEST_U(2,8)<1>         ubAVS_RESPONSE_2(7,1)<16;4,2>      
-    mov (8) uwDEST_U(3,8)<1>         ubAVS_RESPONSE_2(7,8+1)<16;4,2>    
-    mov (8) uwDEST_U(4,8)<1>         ubAVS_RESPONSE_2(10,1)<16;4,2>     
-    mov (8) uwDEST_U(5,8)<1>         ubAVS_RESPONSE_2(10,8+1)<16;4,2>   
-    mov (8) uwDEST_U(6,8)<1>         ubAVS_RESPONSE_2(11,1)<16;4,2>     
-    mov (8) uwDEST_U(7,8)<1>         ubAVS_RESPONSE_2(11,8+1)<16;4,2>   
-#endif
-
-       // Re-define new # of lines
-       #undef nUV_NUM_OF_ROWS
-       #undef nY_NUM_OF_ROWS
-       
-       #define nY_NUM_OF_ROWS      8
-       #define nUV_NUM_OF_ROWS     8
-
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm b/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
deleted file mode 100644
index 69330ba..0000000
--- a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * All Video Processing kernels 
- * Copyright © <2010>, Intel Corporation.
- *
- * This program is licensed under the terms and conditions of the
- * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
- * http://www.opensource.org/licenses/eclipse-1.0.php.
- *
- */
-
-#define DI_ENABLE
-
-    #include "DNDI.inc"
-    
-    #undef  nY_NUM_OF_ROWS
-    #define nY_NUM_OF_ROWS      8       // Number of Y rows per block (4 rows for each frame) 
-    #undef  nUV_NUM_OF_ROWS
-    #define nUV_NUM_OF_ROWS     8       // Number of U/V rows per block
-
-    #undef  nSMPL_RESP_LEN
-    #define nSMPL_RESP_LEN          nSMPL_RESP_LEN_DNDI               // set the number of GRF 
-    #undef  nDPW_BLOCK_SIZE_HIST
-    #define nDPW_BLOCK_SIZE_HIST    nBLOCK_WIDTH_4+nBLOCK_HEIGHT_1    // HIST Block Size for Write is 4x2
-    #undef  nDPW_BLOCK_SIZE_DN
-    #define nDPW_BLOCK_SIZE_DN      nBLOCK_WIDTH_16+nBLOCK_HEIGHT_4   // DN Block Size for Write is 16x4
-    #undef  nDPR_BLOCK_SIZE_UV
-    #define nDPR_BLOCK_SIZE_UV			nBLOCK_WIDTH_16+nBLOCK_HEIGHT_2   // DN Block Size for UV Write/Read is 16x2
-   
-////////////////////////////////////// Run the DN Algorithm ///////////////////////////////////////
-    #include "DNDI_COMMAND.asm"
-
-////////////////////////////////////// Rearrange for Internal Planar //////////////////////////////
-    // move the previous frame Y component to internal planar format
-    $for (0; <nY_NUM_OF_ROWS/2; 1) {
-        mov (16) uwDEST_Y(%1,0)<1>    ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16)
-    }
-    // move the previous frame U,V components to internal planar format
-    $for (0; <nUV_NUM_OF_ROWS/2; 1) {
-        mov (8) uwDEST_U(0,%1*8)<1>   ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2>  //U pixels
-        mov (8) uwDEST_V(0,%1*8)<1>   ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16)<16;8,2>    //V pixels
-    }
-    // move the current frame Y component to internal planar format
-    $for (0; <nY_NUM_OF_ROWS/2; 1) {
-        mov (16) uwDEST_Y(%1+4,0)<1>  ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16)
-    }
-    // move the current frame U,V components to internal planar format
-    $for (0; <nUV_NUM_OF_ROWS/2; 1) {
-        mov (8) uwDEST_U(2,%1*8)<1>   ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2>  //U pixels
-        mov (8) uwDEST_V(2,%1*8)<1>   ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16)<16;8,2>    //V pixels
-    }
-
-////////////////////////////////////// Save the STMM Data for Next Run /////////////////////////
-    // Write STMM to memory
-    shr (1)     rMSGSRC.0<1>:ud        wORIX<0;1,0>:w            1:w     // X origin / 2
-    mov (1)     rMSGSRC.1<1>:ud        wORIY<0;1,0>:w                    // Y origin
-    mov (1)     rMSGSRC.2<1>:ud        nDPW_BLOCK_SIZE_STMM:ud           // block width and height (8x4)
-    mov (8)     mudMSGHDR_STMM(0)<1>   rMSGSRC.0<8;8,1>:ud               // message header   
-    mov (8)     mudMSGHDR_STMM(1)<1>   udRESP(nDI_STMM_OFFSET,0)         // Move STMM to MRF 
-    send (8)    dNULLREG               mMSGHDR_STMM              udDUMMY_NULL    nDATAPORT_WRITE     nDPMW_MSGDSC+nDPMW_MSG_LEN_STMM+nBI_STMM_HISTORY_OUTPUT:ud      
-
-////////////////////////////////////// Save the History Data for Next Run /////////////////////////
-    #include "DI_Hist_Save.asm"
-
-////////////////////////////////////// Save the DN Curr Frame for Next Run ////////////////////////
-    add (4)     pCF_Y_OFFSET<1>:uw          ubSRC_CF_OFFSET<4;4,1>:ub  npDN_YUV:w
-    // check top/bottom field first
-    cmp.e.f0.0 (1)  null<1>:w               ubTFLD_FIRST<0;1,0>:ub     1:w
-    (f0.0) jmpi (1) TOP_FIELD_FIRST
-
-BOTTOM_FIELD_FIRST:
-    $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) {
-        mov (4)     mudMSGHDR_DN(1,%1*4)<1>     udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2)
-        mov (4)     mudMSGHDR_DN(1,%1*4+4)<1>   udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3)
-    }
-    jmpi (1) SAVE_DN_CURR
-    
-TOP_FIELD_FIRST:
-    $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) {
-        mov (4)     mudMSGHDR_DN(1,%1*4)<1>     udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2)
-        mov (4)     mudMSGHDR_DN(1,%1*4+4)<1>   udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3)
-    }
-SAVE_DN_CURR:
-    mov (2)     rMSGSRC.0<1>:ud        wORIX<2;2,1>:w               // X origin and Y origin
-    mov (1)     rMSGSRC.2<1>:ud        nDPW_BLOCK_SIZE_DN:ud        // block width and height (16x4)
-    mov (8)     mudMSGHDR_DN(0)<1>     rMSGSRC.0<8;8,1>:ud
-    send (8)    dNULLREG    mMSGHDR_DN   udDUMMY_NULL    nDATAPORT_WRITE    nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud
-
-
-/////////////////////////////NV12 UV Copy 422/////////////////////////////////////////////////////
-		//Read UV through DATAPORT    
-    add  (2) rMSGSRC.0<1>:d     wORIX<2;2,1>:w    wSRC_H_ORI_OFFSET<2;2,1>:w       // Source Y Block origin
-    asr (1)  rMSGSRC.1<1>:d     rMSGSRC.1<0;1,0>:d       1:w   // U/V block origin should be half of Y's
-    mov (1)  rMSGSRC.2<1>:ud    nDPR_BLOCK_SIZE_UV:ud          // U/V block width and height (16x2)
-    mov  (8) mudMSGHDR_DN<1>     rMSGSRC<8;8,1>:ud
-    send (8) udBOT_U_IO(0)<1>     mMSGHDR_DN    udDUMMY_NULL    nDATAPORT_READ    nDPMR_MSGDSC+nRESLEN_1+nBI_CURRENT_SRC_UV:ud
-
- 		//Write UV through DATAPORT
-		mov (2)     rMSGSRC.0<1>:ud        wORIX<2;2,1>:w               // X origin and Y origin
-		asr (1)     rMSGSRC.1<1>:d         rMSGSRC.1<0;1,0>:d    1:w  // U/V block origin should be half of Y's
-    mov (1)     rMSGSRC.2<1>:ud        nDPR_BLOCK_SIZE_UV:ud        // block width and height (16x2)
-    mov (8)     mudMSGHDR_DN(0)<1>     rMSGSRC.0<8;8,1>:ud
-    mov (8)			mudMSGHDR_DN(1)<1>		 udBOT_U_IO(0)<8;8,1>
-    send (8)    dNULLREG    mMSGHDR_DN   udDUMMY_NULL    nDATAPORT_WRITE    nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud 
\ No newline at end of file
diff --git a/src/shaders/post_processing/Makefile.am b/src/shaders/post_processing/Makefile.am
index 41b68bf..0dfd633 100644
--- a/src/shaders/post_processing/Makefile.am
+++ b/src/shaders/post_processing/Makefile.am
@@ -1,191 +1,4 @@
-
-INTEL_G4I = 
-
-INTEL_G4A	= null.g4a
-INTEL_G4B	= null.g4b
-INTEL_G4B_GEN5	= null.g4b.gen5
-INTEL_G6A	= null.g6a
-INTEL_G6B	= null.g6b
-
-INTEL_PP_G4B_GEN5 = \
-	nv12_avs_nv12.g4b.gen5			\
-	nv12_dndi_nv12.g4b.gen5			\
-	nv12_load_save_nv12.g4b.gen5		\
-	nv12_scaling_nv12.g4b.gen5		\
-	$(NULL)
-
-INTEL_PP_G6B = \
-	nv12_avs_nv12.g6b			\
-	nv12_dndi_nv12.g6b			\
-	nv12_load_save_nv12.g6b			\
-	nv12_scaling_nv12.g6b			\
-	$(NULL)
-
-INTEL_PP_ASM = \
-	nv12_avs_nv12.asm			\
-	nv12_dndi_nv12.asm			\
-	nv12_load_save_nv12.asm			\
-	nv12_scaling_nv12.asm			\
-	$(NULL)
-
-INTEL_PP_ASM += \
-	Common/AYUV_Load_16x8.asm			\
-	Common/IMC3_Load_8x4.asm			\
-	Common/IMC3_Load_8x5.asm			\
-	Common/IMC3_Load_9x5.asm			\
-	Common/Init_All_Regs.asm			\
-	Common/Multiple_Loop.asm			\
-	Common/Multiple_Loop_Head.asm			\
-	Common/NV11_Load_4x8.asm			\
-	Common/NV11_Load_5x8.asm			\
-	Common/NV12_Load_8x4.asm			\
-	Common/NV12_Load_8x5.asm			\
-	Common/NV12_Load_9x5.asm			\
-	Common/P208_Load_8x8.asm			\
-	Common/P208_Load_9x8.asm			\
-	Common/PA_Load_8x8.asm				\
-	Common/PA_Load_9x8.asm				\
-	Common/PL16x8_PL8x4.asm				\
-	Common/PL16x8_PL8x8.asm				\
-	Common/PL4x8_Save_NV11.asm			\
-	Common/PL5x8_PL16x8.asm				\
-	Common/PL5x8_PL8x8.asm				\
-	Common/PL8x4_Save_IMC3.asm			\
-	Common/PL8x4_Save_NV12.asm			\
-	Common/PL8x5_PL8x8.asm				\
-	Common/PL8x8_PL8x4.asm				\
-	Common/PL8x8_Save_P208.asm			\
-	Common/PL8x8_Save_PA.asm			\
-	Common/PL9x5_PL16x8.asm				\
-	Common/PL9x8_PL16x8.asm				\
-	Common/RGB16x8_Save_RGB.asm			\
-	Common/RGB16x8_Save_RGB16.asm			\
-	Common/RGB16x8_Save_Y416.asm			\
-	Common/RGB_Pack.asm				\
-	Common/SetupVPKernel.asm			\
-	Common/readSampler16x1.asm			\
-	Core_Kernels/AVS_SetupFirstBlock.asm		\
-	Core_Kernels/AVS_SetupSecondBlock.asm		\
-	Core_Kernels/DI_Hist_Save.asm			\
-	Core_Kernels/DI_SAVE_PA.asm			\
-	Core_Kernels/DNDI_COMMAND.asm			\
-	Core_Kernels/DNDI_Hist_Save.asm			\
-	Core_Kernels/PA_AVS_IEF_16x8.asm		\
-	Core_Kernels/PA_AVS_IEF_8x4.asm			\
-	Core_Kernels/PA_AVS_IEF_8x8.asm			\
-	Core_Kernels/PA_AVS_IEF_Sample.asm		\
-	Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm		\
-	Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm		\
-	Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm		\
-	Core_Kernels/PA_DNDI_ALG.asm			\
-	Core_Kernels/PA_DN_ALG.asm			\
-	Core_Kernels/PA_Scaling.asm			\
-	Core_Kernels/PL2_AVS_IEF_16x8.asm		\
-	Core_Kernels/PL2_AVS_IEF_8x4.asm		\
-	Core_Kernels/PL2_AVS_IEF_8x8.asm		\
-	Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm	\
-	Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm		\
-	Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm		\
-	Core_Kernels/PL2_Scaling.asm			\
-	Core_Kernels/PL3_AVS_IEF_16x8.asm		\
-	Core_Kernels/PL3_AVS_IEF_8x4.asm		\
-	Core_Kernels/PL3_AVS_IEF_8x8.asm		\
-	Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm	\
-	Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm		\
-	Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm		\
-	Core_Kernels/PL3_Scaling.asm			\
-	Core_Kernels/PL_DNDI_ALG.asm			\
-	Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm	\
-	Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm	\
-	Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm	\
-	Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm		\
-	Core_Kernels/PL_DN_ALG.asm			\
-	Core_Kernels/RGB_AVS_IEF_16x8.asm		\
-	Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm	\
-	Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm	\
-	Core_Kernels/RGB_Scaling.asm			\
-	$(NULL)
-
-INTEL_PP_INC = \
-	Common/AYUV_Load_16x8.inc		\
-	Common/Expansion.inc			\
-	Common/PA_Load.inc			\
-	Common/PL2_Load.inc			\
-	Common/PL3_Load.inc			\
-	Common/PL4x8_Save_NV11.inc		\
-	Common/PL8x4_Save_IMC3.inc		\
-	Common/PL8x4_Save_NV12.inc		\
-	Common/PL8x8_PL8x4.inc			\
-	Common/PL8x8_Save_P208.inc		\
-	Common/PL8x8_Save_PA.inc		\
-	Common/RGB16x8_Save_RGB.inc		\
-	Common/RGB16x8_Save_RGB16.inc		\
-	Common/RGB16x8_Save_Y416.inc		\
-	Common/common.inc			\
-	Common/undefall.inc			\
-	Core_Kernels/AVS_IEF.inc		\
-	Core_Kernels/DI.inc			\
-	Core_Kernels/DNDI.inc			\
-	Core_Kernels/Scaling.inc
-	$(NULL)
-
-INTEL_PP_GEN5_ASM = $(INTEL_PP_G4B_GEN5:%.g4b.gen5=%.g5s)
-INTEL_PP_GEN6_ASM = $(INTEL_PP_G6B:%.g6b=%.g6s)
-
-TARGETS  =
-if HAVE_GEN4ASM
-TARGETS += $(INTEL_PP_G4B_GEN5)
-TARGETS += $(INTEL_PP_G6B)
-endif
-
-all-local: $(TARGETS)
-
-SUFFIXES = .g4a .g4b .g6a .g6b .g5s .g6s .asm
-
-if HAVE_GEN4ASM
-.g4a.g4b:
-	$(AM_V_GEN)m4 $*.g4a > $*.g4m			&& \
-	$(AM_V_GEN)$(GEN4ASM) -o $@ $*.g4m		&& \
-	$(AM_V_GEN)$(GEN4ASM) -g 5 -o $@.gen5 $*.g4m	&& \
-	rm $*.g4m
-
-.g6a.g6b:
-	$(AM_V_GEN)m4 $< > $*.g6m			&& \
-	$(AM_V_GEN)$(GEN4ASM) -g 6 -o $@ $*.g6m		&& \
-	rm $*.g6m
-
-$(INTEL_G4B): $(INTEL_G4I)
-
-$(INTEL_PP_GEN5_ASM): $(INTEL_PP_ASM)
-.asm.g5s:
-	$(AM_V_GEN)cpp -D DEV_ILK -I Common/ -I Core_Kernels $< > _pp0.$@; \
-	../gpp.py _pp0.$@ $@; \
-	rm _pp0.$@
-.g5s.g4b.gen5:
-	$(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 5 $<
-
-$(INTEL_PP_GEN6_ASM): $(INTEL_PP_ASM)
-.asm.g6s:
-	$(AM_V_GEN)cpp -D GT -I Common/ -I Core_Kernels $< > _pp0.$@; \
-	../gpp.py _pp0.$@ $@; \
-	rm _pp0.$@
-.g6s.g6b:
-	$(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 6 $<
-endif
-
-CLEANFILES = $(INTEL_PP_GEN5_ASM) $(INTEL_PP_GEN6_ASM)
-
-EXTRA_DIST = \
-	$(INTEL_G4A)		\
-	$(INTEL_G4B)		\
-	$(INTEL_G4B_GEN5)	\
-	$(INTEL_G4I)		\
-	$(INTEL_G6B)		\
-	$(INTEL_PP_ASM)		\
-	$(INTEL_PP_G4B_GEN5)	\
-	$(INTEL_PP_G6B)		\
-	$(INTEL_PP_INC)		\
-	$(NULL)
+SUBDIRS = gen5_6 gen7 
 
 # Extra clean files so that maintainer-clean removes *everything*
 MAINTAINERCLEANFILES = Makefile.in
diff --git a/src/shaders/post_processing/Common/AYUV_Load_16x8.asm b/src/shaders/post_processing/gen5_6/Common/AYUV_Load_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/AYUV_Load_16x8.asm
rename to src/shaders/post_processing/gen5_6/Common/AYUV_Load_16x8.asm
diff --git a/src/shaders/post_processing/Common/AYUV_Load_16x8.inc b/src/shaders/post_processing/gen5_6/Common/AYUV_Load_16x8.inc
similarity index 100%
rename from src/shaders/post_processing/Common/AYUV_Load_16x8.inc
rename to src/shaders/post_processing/gen5_6/Common/AYUV_Load_16x8.inc
diff --git a/src/shaders/post_processing/Common/Expansion.inc b/src/shaders/post_processing/gen5_6/Common/Expansion.inc
similarity index 100%
rename from src/shaders/post_processing/Common/Expansion.inc
rename to src/shaders/post_processing/gen5_6/Common/Expansion.inc
diff --git a/src/shaders/post_processing/Common/IMC3_Load_8x4.asm b/src/shaders/post_processing/gen5_6/Common/IMC3_Load_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Common/IMC3_Load_8x4.asm
rename to src/shaders/post_processing/gen5_6/Common/IMC3_Load_8x4.asm
diff --git a/src/shaders/post_processing/Common/IMC3_Load_8x5.asm b/src/shaders/post_processing/gen5_6/Common/IMC3_Load_8x5.asm
similarity index 100%
rename from src/shaders/post_processing/Common/IMC3_Load_8x5.asm
rename to src/shaders/post_processing/gen5_6/Common/IMC3_Load_8x5.asm
diff --git a/src/shaders/post_processing/Common/IMC3_Load_9x5.asm b/src/shaders/post_processing/gen5_6/Common/IMC3_Load_9x5.asm
similarity index 100%
rename from src/shaders/post_processing/Common/IMC3_Load_9x5.asm
rename to src/shaders/post_processing/gen5_6/Common/IMC3_Load_9x5.asm
diff --git a/src/shaders/post_processing/Common/Init_All_Regs.asm b/src/shaders/post_processing/gen5_6/Common/Init_All_Regs.asm
similarity index 100%
rename from src/shaders/post_processing/Common/Init_All_Regs.asm
rename to src/shaders/post_processing/gen5_6/Common/Init_All_Regs.asm
diff --git a/src/shaders/post_processing/Common/Multiple_Loop.asm b/src/shaders/post_processing/gen5_6/Common/Multiple_Loop.asm
similarity index 100%
rename from src/shaders/post_processing/Common/Multiple_Loop.asm
rename to src/shaders/post_processing/gen5_6/Common/Multiple_Loop.asm
diff --git a/src/shaders/post_processing/Common/Multiple_Loop_Head.asm b/src/shaders/post_processing/gen5_6/Common/Multiple_Loop_Head.asm
similarity index 100%
rename from src/shaders/post_processing/Common/Multiple_Loop_Head.asm
rename to src/shaders/post_processing/gen5_6/Common/Multiple_Loop_Head.asm
diff --git a/src/shaders/post_processing/Common/NV11_Load_4x8.asm b/src/shaders/post_processing/gen5_6/Common/NV11_Load_4x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/NV11_Load_4x8.asm
rename to src/shaders/post_processing/gen5_6/Common/NV11_Load_4x8.asm
diff --git a/src/shaders/post_processing/Common/NV11_Load_5x8.asm b/src/shaders/post_processing/gen5_6/Common/NV11_Load_5x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/NV11_Load_5x8.asm
rename to src/shaders/post_processing/gen5_6/Common/NV11_Load_5x8.asm
diff --git a/src/shaders/post_processing/Common/NV12_Load_8x4.asm b/src/shaders/post_processing/gen5_6/Common/NV12_Load_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Common/NV12_Load_8x4.asm
rename to src/shaders/post_processing/gen5_6/Common/NV12_Load_8x4.asm
diff --git a/src/shaders/post_processing/Common/NV12_Load_8x5.asm b/src/shaders/post_processing/gen5_6/Common/NV12_Load_8x5.asm
similarity index 100%
rename from src/shaders/post_processing/Common/NV12_Load_8x5.asm
rename to src/shaders/post_processing/gen5_6/Common/NV12_Load_8x5.asm
diff --git a/src/shaders/post_processing/Common/NV12_Load_9x5.asm b/src/shaders/post_processing/gen5_6/Common/NV12_Load_9x5.asm
similarity index 100%
rename from src/shaders/post_processing/Common/NV12_Load_9x5.asm
rename to src/shaders/post_processing/gen5_6/Common/NV12_Load_9x5.asm
diff --git a/src/shaders/post_processing/Common/P208_Load_8x8.asm b/src/shaders/post_processing/gen5_6/Common/P208_Load_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/P208_Load_8x8.asm
rename to src/shaders/post_processing/gen5_6/Common/P208_Load_8x8.asm
diff --git a/src/shaders/post_processing/Common/P208_Load_9x8.asm b/src/shaders/post_processing/gen5_6/Common/P208_Load_9x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/P208_Load_9x8.asm
rename to src/shaders/post_processing/gen5_6/Common/P208_Load_9x8.asm
diff --git a/src/shaders/post_processing/Common/PA_Load.inc b/src/shaders/post_processing/gen5_6/Common/PA_Load.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PA_Load.inc
rename to src/shaders/post_processing/gen5_6/Common/PA_Load.inc
diff --git a/src/shaders/post_processing/gen5_6/Common/PA_Load_8x8.asm b/src/shaders/post_processing/gen5_6/Common/PA_Load_8x8.asm
new file mode 100755
index 0000000..789034f
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/Common/PA_Load_8x8.asm
@@ -0,0 +1,33 @@
+/*
+ * All Video Processing kernels 
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+
+// Module name: PA_Load_8x8.asm
+//----------------------------------------------------------------
+
+#define  PA_LOAD_8x8
+#include "PA_Load.inc"
+
+//  Load 16x8 packed data block
+//  Packed data block should be loaded as 32x8 pixel block
+    add  (2) rMSGSRC.0<1>:d     wORIX<2;2,1>:w    wSRC_H_ORI_OFFSET<2;2,1>:w       // Source Block origin
+    shl  (1) rMSGSRC.0<1>:d     rMSGSRC.0<0;1,0>:w            1:w                  // H. block origin need to be doubled
+    mov  (1) rMSGSRC.2<1>:ud    nDPR_BLOCK_SIZE_YUV:ud                             // Block width and height (32x8)
+    mov  (8) mMSGHDRY<1>:ud     rMSGSRC<8;8,1>:ud
+    send (8) udSRC_YUV(0)<1>    mMSGHDRY    udDUMMY_NULL    nDATAPORT_READ    nDPMR_MSGDSC+nDPR_MSG_SIZE_YUV+nBI_CURRENT_SRC_YUV:ud
+
+//  Unpack to "planar" YUV422 format in word-aligned bytes
+    add  (4) pCF_Y_OFFSET<1>:uw    ubSRC_CF_OFFSET<4;4,1>:ub    nSRC_YUV_REG*nGRFWIB:w    // Initial Y,U,V offset in YUV422 block
+    $for(0; <nY_NUM_OF_ROWS; 1) {
+        mov (16)  uwDEST_Y(0, %1*16)<1>     r[pCF_Y_OFFSET, %1*nGRFWIB]REGION(16,2)
+        mov (8)   uwDEST_U(0, %1*8)<1>      r[pCF_U_OFFSET, %1*nGRFWIB]REGION(8,4)
+        mov (8)   uwDEST_V(0, %1*8)<1>      r[pCF_V_OFFSET, %1*nGRFWIB]REGION(8,4)
+    }
+
+// End of PA_Load_8x8
diff --git a/src/shaders/post_processing/Common/PA_Load_9x8.asm b/src/shaders/post_processing/gen5_6/Common/PA_Load_9x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PA_Load_9x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PA_Load_9x8.asm
diff --git a/src/shaders/post_processing/Common/PL16x8_PL8x4.asm b/src/shaders/post_processing/gen5_6/Common/PL16x8_PL8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL16x8_PL8x4.asm
rename to src/shaders/post_processing/gen5_6/Common/PL16x8_PL8x4.asm
diff --git a/src/shaders/post_processing/Common/PL16x8_PL8x8.asm b/src/shaders/post_processing/gen5_6/Common/PL16x8_PL8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL16x8_PL8x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL16x8_PL8x8.asm
diff --git a/src/shaders/post_processing/Common/PL2_Load.inc b/src/shaders/post_processing/gen5_6/Common/PL2_Load.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL2_Load.inc
rename to src/shaders/post_processing/gen5_6/Common/PL2_Load.inc
diff --git a/src/shaders/post_processing/Common/PL3_Load.inc b/src/shaders/post_processing/gen5_6/Common/PL3_Load.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL3_Load.inc
rename to src/shaders/post_processing/gen5_6/Common/PL3_Load.inc
diff --git a/src/shaders/post_processing/Common/PL4x8_Save_NV11.asm b/src/shaders/post_processing/gen5_6/Common/PL4x8_Save_NV11.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL4x8_Save_NV11.asm
rename to src/shaders/post_processing/gen5_6/Common/PL4x8_Save_NV11.asm
diff --git a/src/shaders/post_processing/Common/PL4x8_Save_NV11.inc b/src/shaders/post_processing/gen5_6/Common/PL4x8_Save_NV11.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL4x8_Save_NV11.inc
rename to src/shaders/post_processing/gen5_6/Common/PL4x8_Save_NV11.inc
diff --git a/src/shaders/post_processing/Common/PL5x8_PL16x8.asm b/src/shaders/post_processing/gen5_6/Common/PL5x8_PL16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL5x8_PL16x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL5x8_PL16x8.asm
diff --git a/src/shaders/post_processing/Common/PL5x8_PL8x8.asm b/src/shaders/post_processing/gen5_6/Common/PL5x8_PL8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL5x8_PL8x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL5x8_PL8x8.asm
diff --git a/src/shaders/post_processing/Common/PL8x4_Save_IMC3.asm b/src/shaders/post_processing/gen5_6/Common/PL8x4_Save_IMC3.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x4_Save_IMC3.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x4_Save_IMC3.asm
diff --git a/src/shaders/post_processing/Common/PL8x4_Save_IMC3.inc b/src/shaders/post_processing/gen5_6/Common/PL8x4_Save_IMC3.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x4_Save_IMC3.inc
rename to src/shaders/post_processing/gen5_6/Common/PL8x4_Save_IMC3.inc
diff --git a/src/shaders/post_processing/Common/PL8x4_Save_NV12.asm b/src/shaders/post_processing/gen5_6/Common/PL8x4_Save_NV12.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x4_Save_NV12.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x4_Save_NV12.asm
diff --git a/src/shaders/post_processing/Common/PL8x4_Save_NV12.inc b/src/shaders/post_processing/gen5_6/Common/PL8x4_Save_NV12.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x4_Save_NV12.inc
rename to src/shaders/post_processing/gen5_6/Common/PL8x4_Save_NV12.inc
diff --git a/src/shaders/post_processing/Common/PL8x5_PL8x8.asm b/src/shaders/post_processing/gen5_6/Common/PL8x5_PL8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x5_PL8x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x5_PL8x8.asm
diff --git a/src/shaders/post_processing/Common/PL8x8_PL8x4.asm b/src/shaders/post_processing/gen5_6/Common/PL8x8_PL8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_PL8x4.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_PL8x4.asm
diff --git a/src/shaders/post_processing/Common/PL8x8_PL8x4.inc b/src/shaders/post_processing/gen5_6/Common/PL8x8_PL8x4.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_PL8x4.inc
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_PL8x4.inc
diff --git a/src/shaders/post_processing/Common/PL8x8_Save_P208.asm b/src/shaders/post_processing/gen5_6/Common/PL8x8_Save_P208.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_Save_P208.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_Save_P208.asm
diff --git a/src/shaders/post_processing/Common/PL8x8_Save_P208.inc b/src/shaders/post_processing/gen5_6/Common/PL8x8_Save_P208.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_Save_P208.inc
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_Save_P208.inc
diff --git a/src/shaders/post_processing/Common/PL8x8_Save_PA.asm b/src/shaders/post_processing/gen5_6/Common/PL8x8_Save_PA.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_Save_PA.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_Save_PA.asm
diff --git a/src/shaders/post_processing/Common/PL8x8_Save_PA.inc b/src/shaders/post_processing/gen5_6/Common/PL8x8_Save_PA.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_Save_PA.inc
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_Save_PA.inc
diff --git a/src/shaders/post_processing/Common/PL9x5_PL16x8.asm b/src/shaders/post_processing/gen5_6/Common/PL9x5_PL16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL9x5_PL16x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL9x5_PL16x8.asm
diff --git a/src/shaders/post_processing/Common/PL9x8_PL16x8.asm b/src/shaders/post_processing/gen5_6/Common/PL9x8_PL16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL9x8_PL16x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL9x8_PL16x8.asm
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_RGB.asm b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB.asm
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_RGB.asm
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB.asm
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_RGB.inc b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB.inc
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_RGB.inc
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB.inc
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_RGB16.asm b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB16.asm
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_RGB16.asm
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB16.asm
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_RGB16.inc b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB16.inc
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_RGB16.inc
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB16.inc
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_Y416.asm b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_Y416.asm
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_Y416.asm
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_Y416.asm
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_Y416.inc b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_Y416.inc
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_Y416.inc
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_Y416.inc
diff --git a/src/shaders/post_processing/Common/RGB_Pack.asm b/src/shaders/post_processing/gen5_6/Common/RGB_Pack.asm
similarity index 100%
rename from src/shaders/post_processing/Common/RGB_Pack.asm
rename to src/shaders/post_processing/gen5_6/Common/RGB_Pack.asm
diff --git a/src/shaders/post_processing/Common/SetupVPKernel.asm b/src/shaders/post_processing/gen5_6/Common/SetupVPKernel.asm
similarity index 100%
rename from src/shaders/post_processing/Common/SetupVPKernel.asm
rename to src/shaders/post_processing/gen5_6/Common/SetupVPKernel.asm
diff --git a/src/shaders/post_processing/Common/common.inc b/src/shaders/post_processing/gen5_6/Common/common.inc
similarity index 100%
rename from src/shaders/post_processing/Common/common.inc
rename to src/shaders/post_processing/gen5_6/Common/common.inc
diff --git a/src/shaders/post_processing/Common/readSampler16x1.asm b/src/shaders/post_processing/gen5_6/Common/readSampler16x1.asm
similarity index 100%
rename from src/shaders/post_processing/Common/readSampler16x1.asm
rename to src/shaders/post_processing/gen5_6/Common/readSampler16x1.asm
diff --git a/src/shaders/post_processing/Common/undefall.inc b/src/shaders/post_processing/gen5_6/Common/undefall.inc
similarity index 100%
rename from src/shaders/post_processing/Common/undefall.inc
rename to src/shaders/post_processing/gen5_6/Common/undefall.inc
diff --git a/src/shaders/post_processing/Core_Kernels/AVS_IEF.inc b/src/shaders/post_processing/gen5_6/Core_Kernels/AVS_IEF.inc
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/AVS_IEF.inc
rename to src/shaders/post_processing/gen5_6/Core_Kernels/AVS_IEF.inc
diff --git a/src/shaders/post_processing/Core_Kernels/AVS_SetupFirstBlock.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/AVS_SetupFirstBlock.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/AVS_SetupFirstBlock.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/AVS_SetupFirstBlock.asm
diff --git a/src/shaders/post_processing/Core_Kernels/AVS_SetupSecondBlock.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/AVS_SetupSecondBlock.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/AVS_SetupSecondBlock.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/AVS_SetupSecondBlock.asm
diff --git a/src/shaders/post_processing/Core_Kernels/DI.inc b/src/shaders/post_processing/gen5_6/Core_Kernels/DI.inc
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DI.inc
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DI.inc
diff --git a/src/shaders/post_processing/Core_Kernels/DI_Hist_Save.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/DI_Hist_Save.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DI_Hist_Save.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DI_Hist_Save.asm
diff --git a/src/shaders/post_processing/Core_Kernels/DI_SAVE_PA.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/DI_SAVE_PA.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DI_SAVE_PA.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DI_SAVE_PA.asm
diff --git a/src/shaders/post_processing/Core_Kernels/DNDI.inc b/src/shaders/post_processing/gen5_6/Core_Kernels/DNDI.inc
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DNDI.inc
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DNDI.inc
diff --git a/src/shaders/post_processing/Core_Kernels/DNDI_COMMAND.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/DNDI_COMMAND.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DNDI_COMMAND.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DNDI_COMMAND.asm
diff --git a/src/shaders/post_processing/Core_Kernels/DNDI_Hist_Save.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/DNDI_Hist_Save.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DNDI_Hist_Save.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DNDI_Hist_Save.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_8x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Sample.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Sample.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Sample.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Sample.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_DNDI_ALG.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_DNDI_ALG.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_DNDI_ALG.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_DNDI_ALG.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_DN_ALG.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_DN_ALG.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_DN_ALG.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_DN_ALG.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_Scaling.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_Scaling.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_Scaling.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_Scaling.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_8x8.asm
diff --git a/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm
new file mode 100644
index 0000000..6c994c1
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm
@@ -0,0 +1,271 @@
+/*
+ * All Video Processing kernels 
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+
+//---------- PL2_AVS_IEF_Unpack_16x8.asm ----------
+        
+#ifdef AVS_OUTPUT_16_BIT	//Output is packed in AVYU format
+// Move first 8x8 words of Y to dest GRF (as packed)
+    mov (4) uwDEST_Y(0,1)<4>       uwAVS_RESPONSE(0,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(1,1)<4>       uwAVS_RESPONSE(0,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(4,1)<4>       uwAVS_RESPONSE(0,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(5,1)<4>       uwAVS_RESPONSE(0,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(8,1)<4>       uwAVS_RESPONSE(1,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(9,1)<4>       uwAVS_RESPONSE(1,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(12,1)<4>      uwAVS_RESPONSE(1,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(13,1)<4>      uwAVS_RESPONSE(1,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(16,1)<4>      uwAVS_RESPONSE(2,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(17,1)<4>      uwAVS_RESPONSE(2,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(20,1)<4>      uwAVS_RESPONSE(2,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(21,1)<4>      uwAVS_RESPONSE(2,12)<4;4,1>                                   
+    mov (4) uwDEST_Y(24,1)<4>      uwAVS_RESPONSE(3,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(25,1)<4>      uwAVS_RESPONSE(3,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(28,1)<4>      uwAVS_RESPONSE(3,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(29,1)<4>      uwAVS_RESPONSE(3,12)<4;4,1>                                   
+
+// Move first 8x8 words of U to dest GRF (as packed)
+    mov (4) uwDEST_Y(0,0)<4>       uwAVS_RESPONSE(4,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(1,0)<4>       uwAVS_RESPONSE(4,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(4,0)<4>       uwAVS_RESPONSE(4,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(5,0)<4>       uwAVS_RESPONSE(4,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(8,0)<4>       uwAVS_RESPONSE(5,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(9,0)<4>       uwAVS_RESPONSE(5,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(12,0)<4>      uwAVS_RESPONSE(5,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(13,0)<4>      uwAVS_RESPONSE(5,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(16,0)<4>      uwAVS_RESPONSE(8,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(17,0)<4>      uwAVS_RESPONSE(8,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(20,0)<4>      uwAVS_RESPONSE(8,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(21,0)<4>      uwAVS_RESPONSE(8,12)<4;4,1>                                   
+    mov (4) uwDEST_Y(24,0)<4>      uwAVS_RESPONSE(9,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(25,0)<4>      uwAVS_RESPONSE(9,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(28,0)<4>      uwAVS_RESPONSE(9,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(29,0)<4>      uwAVS_RESPONSE(9,12)<4;4,1>                                   
+
+// Move first 8x8 words of V to dest GRF (as packed)
+    mov (4) uwDEST_Y(0,2)<4>       uwAVS_RESPONSE(6,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(1,2)<4>       uwAVS_RESPONSE(6,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(4,2)<4>       uwAVS_RESPONSE(6,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(5,2)<4>       uwAVS_RESPONSE(6,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(8,2)<4>       uwAVS_RESPONSE(7,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(9,2)<4>       uwAVS_RESPONSE(7,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(12,2)<4>      uwAVS_RESPONSE(7,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(13,2)<4>      uwAVS_RESPONSE(7,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(16,2)<4>      uwAVS_RESPONSE(10,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(17,2)<4>      uwAVS_RESPONSE(10,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(20,2)<4>      uwAVS_RESPONSE(10,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(21,2)<4>      uwAVS_RESPONSE(10,12)<4;4,1>                                   
+    mov (4) uwDEST_Y(24,2)<4>      uwAVS_RESPONSE(11,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(25,2)<4>      uwAVS_RESPONSE(11,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(28,2)<4>      uwAVS_RESPONSE(11,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(29,2)<4>      uwAVS_RESPONSE(11,12)<4;4,1>                                   
+
+// Move first 8x8 words of A to dest GRF (as packed)
+    mov (4) uwDEST_Y(0,3)<4>       0:uw                                    
+    mov (4) uwDEST_Y(1,3)<4>       0:uw                                    
+    mov (4) uwDEST_Y(4,3)<4>       0:uw                                  
+    mov (4) uwDEST_Y(5,3)<4>       0:uw                                   
+    mov (4) uwDEST_Y(8,3)<4>       0:uw                                    
+    mov (4) uwDEST_Y(9,3)<4>       0:uw                                    
+    mov (4) uwDEST_Y(12,3)<4>      0:uw                                  
+    mov (4) uwDEST_Y(13,3)<4>      0:uw                                   
+    mov (4) uwDEST_Y(16,3)<4>      0:uw                                    
+    mov (4) uwDEST_Y(17,3)<4>      0:uw                                    
+    mov (4) uwDEST_Y(20,3)<4>      0:uw                                  
+    mov (4) uwDEST_Y(21,3)<4>      0:uw                                   
+    mov (4) uwDEST_Y(24,3)<4>      0:uw                                    
+    mov (4) uwDEST_Y(25,3)<4>      0:uw                                    
+    mov (4) uwDEST_Y(28,3)<4>      0:uw                                  
+    mov (4) uwDEST_Y(29,3)<4>      0:uw                                   
+
+// Move second 8x8 words of Y to dest GRF
+    mov (4) uwDEST_Y(2,1)<4>       uwAVS_RESPONSE_2(0,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(3,1)<4>       uwAVS_RESPONSE_2(0,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(6,1)<4>       uwAVS_RESPONSE_2(0,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(7,1)<4>       uwAVS_RESPONSE_2(0,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(10,1)<4>      uwAVS_RESPONSE_2(1,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(11,1)<4>      uwAVS_RESPONSE_2(1,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(14,1)<4>      uwAVS_RESPONSE_2(1,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(15,1)<4>      uwAVS_RESPONSE_2(1,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(18,1)<4>      uwAVS_RESPONSE_2(2,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(19,1)<4>      uwAVS_RESPONSE_2(2,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(22,1)<4>      uwAVS_RESPONSE_2(2,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(23,1)<4>      uwAVS_RESPONSE_2(2,12)<4;4,1>                                   
+    mov (4) uwDEST_Y(26,1)<4>      uwAVS_RESPONSE_2(3,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(27,1)<4>      uwAVS_RESPONSE_2(3,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(30,1)<4>      uwAVS_RESPONSE_2(3,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(31,1)<4>      uwAVS_RESPONSE_2(3,12)<4;4,1>                                   
+
+// Move second 8x8 words of U to dest GRF
+    mov (4) uwDEST_Y(2,0)<4>       uwAVS_RESPONSE_2(4,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(3,0)<4>       uwAVS_RESPONSE_2(4,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(6,0)<4>       uwAVS_RESPONSE_2(4,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(7,0)<4>       uwAVS_RESPONSE_2(4,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(10,0)<4>      uwAVS_RESPONSE_2(5,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(11,0)<4>      uwAVS_RESPONSE_2(5,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(14,0)<4>      uwAVS_RESPONSE_2(5,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(15,0)<4>      uwAVS_RESPONSE_2(5,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(18,0)<4>      uwAVS_RESPONSE_2(8,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(19,0)<4>      uwAVS_RESPONSE_2(8,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(22,0)<4>      uwAVS_RESPONSE_2(8,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(23,0)<4>      uwAVS_RESPONSE_2(8,12)<4;4,1>                                   
+    mov (4) uwDEST_Y(26,0)<4>      uwAVS_RESPONSE_2(9,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(27,0)<4>      uwAVS_RESPONSE_2(9,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(30,0)<4>      uwAVS_RESPONSE_2(9,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(31,0)<4>      uwAVS_RESPONSE_2(9,12)<4;4,1>                                   
+
+// Move second 8x8 words of V to dest GRF
+    mov (4) uwDEST_Y(2,2)<4>       uwAVS_RESPONSE_2(6,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(3,2)<4>       uwAVS_RESPONSE_2(6,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(6,2)<4>       uwAVS_RESPONSE_2(6,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(7,2)<4>       uwAVS_RESPONSE_2(6,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(10,2)<4>      uwAVS_RESPONSE_2(7,0)<4;4,1>                                      
+    mov (4) uwDEST_Y(11,2)<4>      uwAVS_RESPONSE_2(7,8)<4;4,1>                                      
+    mov (4) uwDEST_Y(14,2)<4>      uwAVS_RESPONSE_2(7,4)<4;4,1>                                    
+    mov (4) uwDEST_Y(15,2)<4>      uwAVS_RESPONSE_2(7,12)<4;4,1>                                    
+    mov (4) uwDEST_Y(18,2)<4>      uwAVS_RESPONSE_2(10,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(19,2)<4>      uwAVS_RESPONSE_2(10,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(22,2)<4>      uwAVS_RESPONSE_2(10,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(23,2)<4>      uwAVS_RESPONSE_2(10,12)<4;4,1>                                   
+    mov (4) uwDEST_Y(26,2)<4>      uwAVS_RESPONSE_2(11,0)<4;4,1>                                     
+    mov (4) uwDEST_Y(27,2)<4>      uwAVS_RESPONSE_2(11,8)<4;4,1>                                     
+    mov (4) uwDEST_Y(30,2)<4>      uwAVS_RESPONSE_2(11,4)<4;4,1>                                   
+    mov (4) uwDEST_Y(31,2)<4>      uwAVS_RESPONSE_2(11,12)<4;4,1>                                   
+
+// Move second 8x8 words of A to dest GRF
+    mov (4) uwDEST_Y(2,3)<4>       0:uw                                    
+    mov (4) uwDEST_Y(3,3)<4>       0:uw                                    
+    mov (4) uwDEST_Y(6,3)<4>       0:uw                                  
+    mov (4) uwDEST_Y(7,3)<4>       0:uw                                   
+    mov (4) uwDEST_Y(10,3)<4>      0:uw                                    
+    mov (4) uwDEST_Y(11,3)<4>      0:uw                                    
+    mov (4) uwDEST_Y(14,3)<4>      0:uw                                  
+    mov (4) uwDEST_Y(15,3)<4>      0:uw                                   
+    mov (4) uwDEST_Y(18,3)<4>      0:uw                                    
+    mov (4) uwDEST_Y(19,3)<4>      0:uw                                    
+    mov (4) uwDEST_Y(22,3)<4>      0:uw                                  
+    mov (4) uwDEST_Y(23,3)<4>      0:uw                                   
+    mov (4) uwDEST_Y(26,3)<4>      0:uw                                    
+    mov (4) uwDEST_Y(27,3)<4>      0:uw                                    
+    mov (4) uwDEST_Y(30,3)<4>      0:uw                                  
+    mov (4) uwDEST_Y(31,3)<4>      0:uw                                   
+
+/*	This section will be used if 16-bit output is needed in planar format -vK
+     // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF.
+    $for(0; <8/2; 1) {
+        mov (8) uwDEST_Y(%1*2)<1>        uwAVS_RESPONSE(%1,0)<8;4,1>     
+        mov (8) uwDEST_Y(%1*2+1)<1>      uwAVS_RESPONSE(%1,8)<8;4,1>   
+    } 
+    
+    // Move 1st 8x8 words of U to dest GRF  (Copy high byte in a word)
+    mov (8) uwDEST_U(0)<1>           uwAVS_RESPONSE(4,0)<8;4,1>      
+    mov (8) uwDEST_U(1)<1>           uwAVS_RESPONSE(4,8)<8;4,1>    
+    mov (8) uwDEST_U(2)<1>           uwAVS_RESPONSE(5,0)<8;4,1>      
+    mov (8) uwDEST_U(3)<1>           uwAVS_RESPONSE(5,8)<8;4,1>    
+    mov (8) uwDEST_U(4)<1>           uwAVS_RESPONSE(8,0)<8;4,1>      
+    mov (8) uwDEST_U(5)<1>           uwAVS_RESPONSE(8,8)<8;4,1>    
+    mov (8) uwDEST_U(6)<1>           uwAVS_RESPONSE(9,0)<8;4,1>      
+    mov (8) uwDEST_U(7)<1>           uwAVS_RESPONSE(9,8)<8;4,1>    
+
+    // Move 1st 8x8 words of V to dest GRF  
+    mov (8) uwDEST_V(0)<1>           uwAVS_RESPONSE(6,0)<8;4,1>      
+    mov (8) uwDEST_V(1)<1>           uwAVS_RESPONSE(6,8)<8;4,1>    
+    mov (8) uwDEST_V(2)<1>           uwAVS_RESPONSE(7,0)<8;4,1>      
+    mov (8) uwDEST_V(3)<1>           uwAVS_RESPONSE(7,8)<8;4,1>    
+    mov (8) uwDEST_V(4)<1>           uwAVS_RESPONSE(10,0)<8;4,1>     
+    mov (8) uwDEST_V(5)<1>           uwAVS_RESPONSE(10,8)<8;4,1>   
+    mov (8) uwDEST_V(6)<1>           uwAVS_RESPONSE(11,0)<8;4,1>     
+    mov (8) uwDEST_V(7)<1>           uwAVS_RESPONSE(11,8)<8;4,1>   
+
+    // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF.
+    $for(0; <8/2; 1) {
+        mov (8) uwDEST_Y(%1*2,8)<1>      uwAVS_RESPONSE_2(%1,0)<8;4,1> 
+        mov (8) uwDEST_Y(%1*2+1,8)<1>    uwAVS_RESPONSE_2(%1,8)<8;4,1> 
+    } 
+
+    // Move 2st 8x8 words of U to dest GRF  (Copy high byte in a word)
+    mov (8) uwDEST_U(0,8)<1>         uwAVS_RESPONSE_2(4,0)<8;4,1>      
+    mov (8) uwDEST_U(1,8)<1>         uwAVS_RESPONSE_2(4,8)<8;4,1>    
+    mov (8) uwDEST_U(2,8)<1>         uwAVS_RESPONSE_2(5,0)<8;4,1>      
+    mov (8) uwDEST_U(3,8)<1>         uwAVS_RESPONSE_2(5,8)<8;4,1>    
+    mov (8) uwDEST_U(4,8)<1>         uwAVS_RESPONSE_2(8,0)<8;4,1>      
+    mov (8) uwDEST_U(5,8)<1>         uwAVS_RESPONSE_2(8,8)<8;4,1>    
+    mov (8) uwDEST_U(6,8)<1>         uwAVS_RESPONSE_2(9,0)<8;4,1>      
+    mov (8) uwDEST_U(7,8)<1>         uwAVS_RESPONSE_2(9,8)<8;4,1>    
+
+    // Move 2st 8x8 words of V to dest GRF  
+    mov (8) uwDEST_V(0,8)<1>         uwAVS_RESPONSE_2(6,0)<8;4,1>      
+    mov (8) uwDEST_V(1,8)<1>         uwAVS_RESPONSE_2(6,8)<8;4,1>    
+    mov (8) uwDEST_V(2,8)<1>         uwAVS_RESPONSE_2(7,0)<8;4,1>      
+    mov (8) uwDEST_V(3,8)<1>         uwAVS_RESPONSE_2(7,8)<8;4,1>    
+    mov (8) uwDEST_V(4,8)<1>         uwAVS_RESPONSE_2(10,0)<8;4,1>     
+    mov (8) uwDEST_V(5,8)<1>         uwAVS_RESPONSE_2(10,8)<8;4,1>   
+    mov (8) uwDEST_V(6,8)<1>         uwAVS_RESPONSE_2(11,0)<8;4,1>     
+    mov (8) uwDEST_V(7,8)<1>         uwAVS_RESPONSE_2(11,8)<8;4,1>   
+*/
+#else
+    // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF.
+    $for(0; <8/2; 1) {
+        mov (8) uwDEST_Y(%1*2)<1>        ubAVS_RESPONSE(%1,1)<16;4,2>      // Copy high byte in a word
+        mov (8) uwDEST_Y(%1*2+1)<1>      ubAVS_RESPONSE(%1,8+1)<16;4,2>    // Copy high byte in a word
+    } 
+
+    // Move 1st 8x8 words of U to dest GRF  (Copy high byte in a word)
+    mov (8) uwDEST_U(0)<1>           ubAVS_RESPONSE(4,1)<16;4,2>      
+    mov (8) uwDEST_U(1)<1>           ubAVS_RESPONSE(4,8+1)<16;4,2>    
+    mov (8) uwDEST_U(2)<1>           ubAVS_RESPONSE(5,1)<16;4,2>      
+    mov (8) uwDEST_U(3)<1>           ubAVS_RESPONSE(5,8+1)<16;4,2>    
+    mov (8) uwDEST_U(4)<1>           ubAVS_RESPONSE(8,1)<16;4,2>      
+    mov (8) uwDEST_U(5)<1>           ubAVS_RESPONSE(8,8+1)<16;4,2>    
+    mov (8) uwDEST_U(6)<1>           ubAVS_RESPONSE(9,1)<16;4,2>      
+    mov (8) uwDEST_U(7)<1>           ubAVS_RESPONSE(9,8+1)<16;4,2>    
+
+    // Move 1st 8x8 words of V to dest GRF  
+    mov (8) uwDEST_V(0)<1>           ubAVS_RESPONSE(6,1)<16;4,2>      
+    mov (8) uwDEST_V(1)<1>           ubAVS_RESPONSE(6,8+1)<16;4,2>    
+    mov (8) uwDEST_V(2)<1>           ubAVS_RESPONSE(7,1)<16;4,2>      
+    mov (8) uwDEST_V(3)<1>           ubAVS_RESPONSE(7,8+1)<16;4,2>    
+    mov (8) uwDEST_V(4)<1>           ubAVS_RESPONSE(10,1)<16;4,2>     
+    mov (8) uwDEST_V(5)<1>           ubAVS_RESPONSE(10,8+1)<16;4,2>   
+    mov (8) uwDEST_V(6)<1>           ubAVS_RESPONSE(11,1)<16;4,2>     
+    mov (8) uwDEST_V(7)<1>           ubAVS_RESPONSE(11,8+1)<16;4,2>   
+
+    // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF.
+    $for(0; <8/2; 1) {
+        mov (8) uwDEST_Y(%1*2,8)<1>      ubAVS_RESPONSE_2(%1,1)<16;4,2>    // Copy high byte in a word
+        mov (8) uwDEST_Y(%1*2+1,8)<1>    ubAVS_RESPONSE_2(%1,8+1)<16;4,2>  // Copy high byte in a word
+    } 
+
+    // Move 2st 8x8 words of U to dest GRF  (Copy high byte in a word)
+    mov (8) uwDEST_U(0,8)<1>         ubAVS_RESPONSE_2(4,1)<16;4,2>      
+    mov (8) uwDEST_U(1,8)<1>         ubAVS_RESPONSE_2(4,8+1)<16;4,2>    
+    mov (8) uwDEST_U(2,8)<1>         ubAVS_RESPONSE_2(5,1)<16;4,2>      
+    mov (8) uwDEST_U(3,8)<1>         ubAVS_RESPONSE_2(5,8+1)<16;4,2>    
+    mov (8) uwDEST_U(4,8)<1>         ubAVS_RESPONSE_2(8,1)<16;4,2>      
+    mov (8) uwDEST_U(5,8)<1>         ubAVS_RESPONSE_2(8,8+1)<16;4,2>    
+    mov (8) uwDEST_U(6,8)<1>         ubAVS_RESPONSE_2(9,1)<16;4,2>      
+    mov (8) uwDEST_U(7,8)<1>         ubAVS_RESPONSE_2(9,8+1)<16;4,2>    
+
+    // Move 2st 8x8 words of V to dest GRF  
+    mov (8) uwDEST_V(0,8)<1>         ubAVS_RESPONSE_2(6,1)<16;4,2>      
+    mov (8) uwDEST_V(1,8)<1>         ubAVS_RESPONSE_2(6,8+1)<16;4,2>    
+    mov (8) uwDEST_V(2,8)<1>         ubAVS_RESPONSE_2(7,1)<16;4,2>      
+    mov (8) uwDEST_V(3,8)<1>         ubAVS_RESPONSE_2(7,8+1)<16;4,2>    
+    mov (8) uwDEST_V(4,8)<1>         ubAVS_RESPONSE_2(10,1)<16;4,2>     
+    mov (8) uwDEST_V(5,8)<1>         ubAVS_RESPONSE_2(10,8+1)<16;4,2>   
+    mov (8) uwDEST_V(6,8)<1>         ubAVS_RESPONSE_2(11,1)<16;4,2>     
+    mov (8) uwDEST_V(7,8)<1>         ubAVS_RESPONSE_2(11,8+1)<16;4,2>   
+#endif
+
+       // Re-define new # of lines
+       #undef nUV_NUM_OF_ROWS
+       #undef nY_NUM_OF_ROWS
+       
+       #define nY_NUM_OF_ROWS      8
+       #define nUV_NUM_OF_ROWS     8
+
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_Scaling.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_Scaling.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_Scaling.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_Scaling.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_8x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_Scaling.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_Scaling.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_Scaling.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_Scaling.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm
diff --git a/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
new file mode 100644
index 0000000..90089ac
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
@@ -0,0 +1,107 @@
+/*
+ * All Video Processing kernels 
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+
+#define DI_ENABLE
+
+    #include "DNDI.inc"
+    
+    #undef  nY_NUM_OF_ROWS
+    #define nY_NUM_OF_ROWS      8       // Number of Y rows per block (4 rows for each frame) 
+    #undef  nUV_NUM_OF_ROWS
+    #define nUV_NUM_OF_ROWS     8       // Number of U/V rows per block
+
+    #undef  nSMPL_RESP_LEN
+    #define nSMPL_RESP_LEN          nSMPL_RESP_LEN_DNDI               // set the number of GRF 
+    #undef  nDPW_BLOCK_SIZE_HIST
+    #define nDPW_BLOCK_SIZE_HIST    nBLOCK_WIDTH_4+nBLOCK_HEIGHT_1    // HIST Block Size for Write is 4x2
+    #undef  nDPW_BLOCK_SIZE_DN
+    #define nDPW_BLOCK_SIZE_DN      nBLOCK_WIDTH_16+nBLOCK_HEIGHT_4   // DN Block Size for Write is 16x4
+    #undef  nDPR_BLOCK_SIZE_UV
+    #define nDPR_BLOCK_SIZE_UV			nBLOCK_WIDTH_16+nBLOCK_HEIGHT_2   // DN Block Size for UV Write/Read is 16x2
+   
+////////////////////////////////////// Run the DN Algorithm ///////////////////////////////////////
+    #include "DNDI_COMMAND.asm"
+
+////////////////////////////////////// Rearrange for Internal Planar //////////////////////////////
+    // move the previous frame Y component to internal planar format
+    $for (0; <nY_NUM_OF_ROWS/2; 1) {
+        mov (16) uwDEST_Y(%1,0)<1>    ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16)
+    }
+    // move the previous frame U,V components to internal planar format
+    $for (0; <nUV_NUM_OF_ROWS/2; 1) {
+        mov (8) uwDEST_U(0,%1*8)<1>   ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2>  //U pixels
+        mov (8) uwDEST_V(0,%1*8)<1>   ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16)<16;8,2>    //V pixels
+    }
+    // move the current frame Y component to internal planar format
+    $for (0; <nY_NUM_OF_ROWS/2; 1) {
+        mov (16) uwDEST_Y(%1+4,0)<1>  ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16)
+    }
+    // move the current frame U,V components to internal planar format
+    $for (0; <nUV_NUM_OF_ROWS/2; 1) {
+        mov (8) uwDEST_U(2,%1*8)<1>   ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2>  //U pixels
+        mov (8) uwDEST_V(2,%1*8)<1>   ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16)<16;8,2>    //V pixels
+    }
+
+////////////////////////////////////// Save the STMM Data for Next Run /////////////////////////
+    // Write STMM to memory
+    shr (1)     rMSGSRC.0<1>:ud        wORIX<0;1,0>:w            1:w     // X origin / 2
+    mov (1)     rMSGSRC.1<1>:ud        wORIY<0;1,0>:w                    // Y origin
+    mov (1)     rMSGSRC.2<1>:ud        nDPW_BLOCK_SIZE_STMM:ud           // block width and height (8x4)
+    mov (8)     mudMSGHDR_STMM(0)<1>   rMSGSRC.0<8;8,1>:ud               // message header   
+    mov (8)     mudMSGHDR_STMM(1)<1>   udRESP(nDI_STMM_OFFSET,0)         // Move STMM to MRF 
+    send (8)    dNULLREG               mMSGHDR_STMM              udDUMMY_NULL    nDATAPORT_WRITE     nDPMW_MSGDSC+nDPMW_MSG_LEN_STMM+nBI_STMM_HISTORY_OUTPUT:ud      
+
+////////////////////////////////////// Save the History Data for Next Run /////////////////////////
+    #include "DI_Hist_Save.asm"
+
+////////////////////////////////////// Save the DN Curr Frame for Next Run ////////////////////////
+    add (4)     pCF_Y_OFFSET<1>:uw          ubSRC_CF_OFFSET<4;4,1>:ub  npDN_YUV:w
+    // check top/bottom field first
+    cmp.e.f0.0 (1)  null<1>:w               ubTFLD_FIRST<0;1,0>:ub     1:w
+    (f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+    $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) {
+        mov (4)     mudMSGHDR_DN(1,%1*4)<1>     udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2)
+        mov (4)     mudMSGHDR_DN(1,%1*4+4)<1>   udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3)
+    }
+    jmpi (1) SAVE_DN_CURR
+    
+TOP_FIELD_FIRST:
+    $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) {
+        mov (4)     mudMSGHDR_DN(1,%1*4)<1>     udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2)
+        mov (4)     mudMSGHDR_DN(1,%1*4+4)<1>   udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3)
+    }
+SAVE_DN_CURR:
+    $for (0; <nY_NUM_OF_ROWS/2; 1) {
+        mov (16) mubMSGHDR_DN(1, %1*16)<1>       ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16)
+    }
+        
+    mov (2)     rMSGSRC.0<1>:ud        wORIX<2;2,1>:w               // X origin and Y origin
+    mov (1)     rMSGSRC.2<1>:ud        nDPW_BLOCK_SIZE_DN:ud        // block width and height (16x4)
+    mov (8)     mudMSGHDR_DN(0)<1>     rMSGSRC.0<8;8,1>:ud
+    send (8)    dNULLREG    mMSGHDR_DN   udDUMMY_NULL    nDATAPORT_WRITE    nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud
+
+
+/////////////////////////////NV12 UV Copy 422/////////////////////////////////////////////////////
+		//Read UV through DATAPORT    
+    add  (2) rMSGSRC.0<1>:d     wORIX<2;2,1>:w    wSRC_H_ORI_OFFSET<2;2,1>:w       // Source Y Block origin
+    asr (1)  rMSGSRC.1<1>:d     rMSGSRC.1<0;1,0>:d       1:w   // U/V block origin should be half of Y's
+    mov (1)  rMSGSRC.2<1>:ud    nDPR_BLOCK_SIZE_UV:ud          // U/V block width and height (16x2)
+    mov  (8) mudMSGHDR_DN<1>     rMSGSRC<8;8,1>:ud
+    send (8) udBOT_U_IO(0)<1>     mMSGHDR_DN    udDUMMY_NULL    nDATAPORT_READ    nDPMR_MSGDSC+nRESLEN_1+nBI_CURRENT_SRC_UV:ud
+
+ 		//Write UV through DATAPORT
+		mov (2)     rMSGSRC.0<1>:ud        wORIX<2;2,1>:w               // X origin and Y origin
+		asr (1)     rMSGSRC.1<1>:d         rMSGSRC.1<0;1,0>:d    1:w  // U/V block origin should be half of Y's
+    mov (1)     rMSGSRC.2<1>:ud        nDPR_BLOCK_SIZE_UV:ud        // block width and height (16x2)
+    mov (8)     mudMSGHDR_DN(0)<1>     rMSGSRC.0<8;8,1>:ud
+    mov (8)			mudMSGHDR_DN(1)<1>		 udBOT_U_IO(0)<8;8,1>
+    send (8)    dNULLREG    mMSGHDR_DN   udDUMMY_NULL    nDATAPORT_WRITE    nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud 
\ No newline at end of file
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DN_ALG.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DN_ALG.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL_DN_ALG.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL_DN_ALG.asm
diff --git a/src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/RGB_Scaling.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/RGB_Scaling.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/RGB_Scaling.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/RGB_Scaling.asm
diff --git a/src/shaders/post_processing/Core_Kernels/Scaling.inc b/src/shaders/post_processing/gen5_6/Core_Kernels/Scaling.inc
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/Scaling.inc
rename to src/shaders/post_processing/gen5_6/Core_Kernels/Scaling.inc
diff --git a/src/shaders/post_processing/gen5_6/Makefile.am b/src/shaders/post_processing/gen5_6/Makefile.am
new file mode 100755
index 0000000..8642e61
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/Makefile.am
@@ -0,0 +1,215 @@
+
+INTEL_G4I = 
+
+INTEL_G4A	= null.g4a
+INTEL_G4B	= null.g4b
+INTEL_G4B_GEN5	= null.g4b.gen5
+INTEL_G6A	= null.g6a
+INTEL_G6B	= null.g6b
+
+INTEL_PP_G4B_GEN5 = \
+	nv12_avs_nv12.g4b.gen5			\
+	nv12_dn_nv12.g4b.gen5                   \
+	nv12_dndi_nv12.g4b.gen5			\
+	nv12_load_save_nv12.g4b.gen5		\
+	nv12_load_save_pl3.g4b.gen5             \
+	nv12_scaling_nv12.g4b.gen5		\
+	pl3_load_save_nv12.g4b.gen5             \
+	pl3_load_save_pl3.g4b.gen5              \
+	pl3_load_save_pa.g4b.gen5		\
+ 	nv12_load_save_pa.g4b.gen5	        \
+	pa_load_save_nv12.g4b.gen5		\
+ 	pa_load_save_pl3.g4b.gen5		\
+ 	$(NULL)
+
+INTEL_PP_G6B = \
+	nv12_avs_nv12.g6b			\
+	nv12_dn_nv12.g6b                	\
+	nv12_dndi_nv12.g6b			\
+	nv12_load_save_nv12.g6b			\
+	nv12_load_save_pl3.g6b          	\
+	nv12_scaling_nv12.g6b			\
+	pl3_load_save_nv12.g6b          	\
+	pl3_load_save_pl3.g6b           	\
+	pl3_load_save_pa.g6b			\
+ 	nv12_load_save_pa.g6b			\
+	pa_load_save_nv12.g6b			\
+ 	pa_load_save_pl3.g6b			\
+	$(NULL)
+
+INTEL_PP_ASM = \
+	nv12_avs_nv12.asm			\
+	nv12_dn_nv12.asm                	\
+	nv12_dndi_nv12.asm			\
+	nv12_load_save_nv12.asm			\
+	nv12_load_save_pl3.asm          	\
+	nv12_scaling_nv12.asm			\
+	pl3_load_save_nv12.asm          	\
+	pl3_load_save_pl3.asm           	\
+	pl3_load_save_pa.asm			\
+ 	nv12_load_save_pa.asm			\
+	pa_load_save_nv12.asm			\
+ 	pa_load_save_pl3.asm		        \
+ 	$(NULL)
+
+INTEL_PP_ASM += \
+	Common/AYUV_Load_16x8.asm			\
+	Common/IMC3_Load_8x4.asm			\
+	Common/IMC3_Load_8x5.asm			\
+	Common/IMC3_Load_9x5.asm			\
+	Common/Init_All_Regs.asm			\
+	Common/Multiple_Loop.asm			\
+	Common/Multiple_Loop_Head.asm			\
+	Common/NV11_Load_4x8.asm			\
+	Common/NV11_Load_5x8.asm			\
+	Common/NV12_Load_8x4.asm			\
+	Common/NV12_Load_8x5.asm			\
+	Common/NV12_Load_9x5.asm			\
+	Common/P208_Load_8x8.asm			\
+	Common/P208_Load_9x8.asm			\
+	Common/PA_Load_8x8.asm				\
+	Common/PA_Load_9x8.asm				\
+	Common/PL16x8_PL8x4.asm				\
+	Common/PL16x8_PL8x8.asm				\
+	Common/PL4x8_Save_NV11.asm			\
+	Common/PL5x8_PL16x8.asm				\
+	Common/PL5x8_PL8x8.asm				\
+	Common/PL8x4_Save_IMC3.asm			\
+	Common/PL8x4_Save_NV12.asm			\
+	Common/PL8x5_PL8x8.asm				\
+	Common/PL8x8_PL8x4.asm				\
+	Common/PL8x8_Save_P208.asm			\
+	Common/PL8x8_Save_PA.asm			\
+	Common/PL9x5_PL16x8.asm				\
+	Common/PL9x8_PL16x8.asm				\
+	Common/RGB16x8_Save_RGB.asm			\
+	Common/RGB16x8_Save_RGB16.asm			\
+	Common/RGB16x8_Save_Y416.asm			\
+	Common/RGB_Pack.asm				\
+	Common/SetupVPKernel.asm			\
+	Common/readSampler16x1.asm			\
+	Core_Kernels/AVS_SetupFirstBlock.asm		\
+	Core_Kernels/AVS_SetupSecondBlock.asm		\
+	Core_Kernels/DI_Hist_Save.asm			\
+	Core_Kernels/DI_SAVE_PA.asm			\
+	Core_Kernels/DNDI_COMMAND.asm			\
+	Core_Kernels/DNDI_Hist_Save.asm			\
+	Core_Kernels/PA_AVS_IEF_16x8.asm		\
+	Core_Kernels/PA_AVS_IEF_8x4.asm			\
+	Core_Kernels/PA_AVS_IEF_8x8.asm			\
+	Core_Kernels/PA_AVS_IEF_Sample.asm		\
+	Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm		\
+	Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm		\
+	Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm		\
+	Core_Kernels/PA_DNDI_ALG.asm			\
+	Core_Kernels/PA_DN_ALG.asm			\
+	Core_Kernels/PA_Scaling.asm			\
+	Core_Kernels/PL2_AVS_IEF_16x8.asm		\
+	Core_Kernels/PL2_AVS_IEF_8x4.asm		\
+	Core_Kernels/PL2_AVS_IEF_8x8.asm		\
+	Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm	\
+	Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm		\
+	Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm		\
+	Core_Kernels/PL2_Scaling.asm			\
+	Core_Kernels/PL3_AVS_IEF_16x8.asm		\
+	Core_Kernels/PL3_AVS_IEF_8x4.asm		\
+	Core_Kernels/PL3_AVS_IEF_8x8.asm		\
+	Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm	\
+	Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm		\
+	Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm		\
+	Core_Kernels/PL3_Scaling.asm			\
+	Core_Kernels/PL_DNDI_ALG.asm			\
+	Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm	\
+	Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm	\
+	Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm	\
+	Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm		\
+	Core_Kernels/PL_DN_ALG.asm			\
+	Core_Kernels/RGB_AVS_IEF_16x8.asm		\
+	Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm	\
+	Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm	\
+	Core_Kernels/RGB_Scaling.asm			\
+	$(NULL)
+
+INTEL_PP_INC = \
+	Common/AYUV_Load_16x8.inc		\
+	Common/Expansion.inc			\
+	Common/PA_Load.inc			\
+	Common/PL2_Load.inc			\
+	Common/PL3_Load.inc			\
+	Common/PL4x8_Save_NV11.inc		\
+	Common/PL8x4_Save_IMC3.inc		\
+	Common/PL8x4_Save_NV12.inc		\
+	Common/PL8x8_PL8x4.inc			\
+	Common/PL8x8_Save_P208.inc		\
+	Common/PL8x8_Save_PA.inc		\
+	Common/RGB16x8_Save_RGB.inc		\
+	Common/RGB16x8_Save_RGB16.inc		\
+	Common/RGB16x8_Save_Y416.inc		\
+	Common/common.inc			\
+	Common/undefall.inc			\
+	Core_Kernels/AVS_IEF.inc		\
+	Core_Kernels/DI.inc			\
+	Core_Kernels/DNDI.inc			\
+	Core_Kernels/Scaling.inc
+	$(NULL)
+
+INTEL_PP_GEN5_ASM = $(INTEL_PP_G4B_GEN5:%.g4b.gen5=%.g5s)
+INTEL_PP_GEN6_ASM = $(INTEL_PP_G6B:%.g6b=%.g6s)
+
+TARGETS  =
+if HAVE_GEN4ASM
+TARGETS += $(INTEL_PP_G4B_GEN5)
+TARGETS += $(INTEL_PP_G6B)
+endif
+
+all-local: $(TARGETS)
+
+SUFFIXES = .g4a .g4b .g6a .g6b .g5s .g6s .asm
+
+if HAVE_GEN4ASM
+.g4a.g4b:
+	$(AM_V_GEN)m4 $*.g4a > $*.g4m			&& \
+	$(AM_V_GEN)$(GEN4ASM) -o $@ $*.g4m		&& \
+	$(AM_V_GEN)$(GEN4ASM) -g 5 -o $@.gen5 $*.g4m	&& \
+	rm $*.g4m
+
+.g6a.g6b:
+	$(AM_V_GEN)m4 $< > $*.g6m			&& \
+	$(AM_V_GEN)$(GEN4ASM) -g 6 -o $@ $*.g6m		&& \
+	rm $*.g6m
+
+$(INTEL_G4B): $(INTEL_G4I)
+
+$(INTEL_PP_GEN5_ASM): $(INTEL_PP_ASM)
+.asm.g5s:
+	$(AM_V_GEN)cpp -D DEV_ILK -I Common/ -I Core_Kernels $< > _pp0.$@; \
+	../../gpp.py _pp0.$@ $@; \
+	rm _pp0.$@
+.g5s.g4b.gen5:
+	$(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 5 $<
+
+$(INTEL_PP_GEN6_ASM): $(INTEL_PP_ASM)
+.asm.g6s:
+	$(AM_V_GEN)cpp -D GT -I Common/ -I Core_Kernels $< > _pp0.$@; \
+	../../gpp.py _pp0.$@ $@; \
+	rm _pp0.$@
+.g6s.g6b:
+	$(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 6 $<
+endif
+
+CLEANFILES = $(INTEL_PP_GEN5_ASM) $(INTEL_PP_GEN6_ASM)
+
+EXTRA_DIST = \
+	$(INTEL_G4A)		\
+	$(INTEL_G4B)		\
+	$(INTEL_G4B_GEN5)	\
+	$(INTEL_G4I)		\
+	$(INTEL_G6B)		\
+	$(INTEL_PP_ASM)		\
+	$(INTEL_PP_G4B_GEN5)	\
+	$(INTEL_PP_G6B)		\
+	$(INTEL_PP_INC)		\
+	$(NULL)
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
diff --git a/src/shaders/post_processing/null.g4a b/src/shaders/post_processing/gen5_6/null.g4a
similarity index 100%
rename from src/shaders/post_processing/null.g4a
rename to src/shaders/post_processing/gen5_6/null.g4a
diff --git a/src/shaders/post_processing/null.g4b b/src/shaders/post_processing/gen5_6/null.g4b
similarity index 100%
rename from src/shaders/post_processing/null.g4b
rename to src/shaders/post_processing/gen5_6/null.g4b
diff --git a/src/shaders/post_processing/null.g4b.gen5 b/src/shaders/post_processing/gen5_6/null.g4b.gen5
similarity index 100%
rename from src/shaders/post_processing/null.g4b.gen5
rename to src/shaders/post_processing/gen5_6/null.g4b.gen5
diff --git a/src/shaders/post_processing/null.g6a b/src/shaders/post_processing/gen5_6/null.g6a
similarity index 100%
rename from src/shaders/post_processing/null.g6a
rename to src/shaders/post_processing/gen5_6/null.g6a
diff --git a/src/shaders/post_processing/null.g6b b/src/shaders/post_processing/gen5_6/null.g6b
similarity index 100%
rename from src/shaders/post_processing/null.g6b
rename to src/shaders/post_processing/gen5_6/null.g6b
diff --git a/src/shaders/post_processing/gen5_6/nv12_avs_nv12.asm b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.asm
new file mode 100644
index 0000000..6e0e1b3
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.asm
@@ -0,0 +1,20 @@
+// Module name: NV12_AVS_NV12
+.kernel NV12_AVS_NV12
+.code
+
+#define INC_SCALING
+        
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "PL2_AVS_IEF_16x8.asm"
+#include "PL16x8_PL8x4.asm"        
+#include "PL8x4_Save_NV12.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD  // End of Thread
+
+.end_code  
+
+.end_kernel
+
+// end of nv12_avs_nv12.asm
diff --git a/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5
new file mode 100644
index 0000000..6685b46
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5
@@ -0,0 +1,170 @@
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x01000005, 0x20002d2c, 0x00000088, 0x80008000 },
+   { 0x00010001, 0x20c003fd, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x212003bd, 0x000000c0, 0x00000000 },
+   { 0x00000001, 0x212403bd, 0x000000bc, 0x00000000 },
+   { 0x00000001, 0x213403bd, 0x00000038, 0x00000000 },
+   { 0x00200001, 0x612803bd, 0x004500a4, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
+   { 0x00802001, 0x20000022, 0x008d0100, 0x00000000 },
+   { 0x00000031, 0x25401c09, 0x208d0000, 0x044bb401 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
+   { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x02000031, 0x25c01c09, 0x208d0000, 0x048bb802 },
+   { 0x00000001, 0x240803bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x000000bc, 0x41000000 },
+   { 0x00000048, 0x21287fbd, 0x000000c0, 0x41e00000 },
+   { 0x00000001, 0x240403bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x21247fbd, 0x000000c0, 0x41000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
+   { 0x00802001, 0x20000022, 0x008d0100, 0x00000000 },
+   { 0x00000031, 0x27401c09, 0x208d0000, 0x044bb401 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
+   { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x02000031, 0x27c01c09, 0x208d0000, 0x048bb802 },
+   { 0x00600001, 0x21400229, 0x00aa0541, 0x00000000 },
+   { 0x00600001, 0x21600229, 0x00aa0549, 0x00000000 },
+   { 0x00600001, 0x21800229, 0x00aa0561, 0x00000000 },
+   { 0x00600001, 0x21a00229, 0x00aa0569, 0x00000000 },
+   { 0x00600001, 0x21c00229, 0x00aa0581, 0x00000000 },
+   { 0x00600001, 0x21e00229, 0x00aa0589, 0x00000000 },
+   { 0x00600001, 0x22000229, 0x00aa05a1, 0x00000000 },
+   { 0x00600001, 0x22200229, 0x00aa05a9, 0x00000000 },
+   { 0x00600001, 0x22400229, 0x00aa05c1, 0x00000000 },
+   { 0x00600001, 0x22600229, 0x00aa05c9, 0x00000000 },
+   { 0x00600001, 0x22800229, 0x00aa05e1, 0x00000000 },
+   { 0x00600001, 0x22a00229, 0x00aa05e9, 0x00000000 },
+   { 0x00600001, 0x22c00229, 0x00aa0641, 0x00000000 },
+   { 0x00600001, 0x22e00229, 0x00aa0649, 0x00000000 },
+   { 0x00600001, 0x23000229, 0x00aa0661, 0x00000000 },
+   { 0x00600001, 0x23200229, 0x00aa0669, 0x00000000 },
+   { 0x00600001, 0x23400229, 0x00aa0601, 0x00000000 },
+   { 0x00600001, 0x23600229, 0x00aa0609, 0x00000000 },
+   { 0x00600001, 0x23800229, 0x00aa0621, 0x00000000 },
+   { 0x00600001, 0x23a00229, 0x00aa0629, 0x00000000 },
+   { 0x00600001, 0x23c00229, 0x00aa0681, 0x00000000 },
+   { 0x00600001, 0x23e00229, 0x00aa0689, 0x00000000 },
+   { 0x00600001, 0x24000229, 0x00aa06a1, 0x00000000 },
+   { 0x00600001, 0x24200229, 0x00aa06a9, 0x00000000 },
+   { 0x00600001, 0x21500229, 0x00aa0741, 0x00000000 },
+   { 0x00600001, 0x21700229, 0x00aa0749, 0x00000000 },
+   { 0x00600001, 0x21900229, 0x00aa0761, 0x00000000 },
+   { 0x00600001, 0x21b00229, 0x00aa0769, 0x00000000 },
+   { 0x00600001, 0x21d00229, 0x00aa0781, 0x00000000 },
+   { 0x00600001, 0x21f00229, 0x00aa0789, 0x00000000 },
+   { 0x00600001, 0x22100229, 0x00aa07a1, 0x00000000 },
+   { 0x00600001, 0x22300229, 0x00aa07a9, 0x00000000 },
+   { 0x00600001, 0x22500229, 0x00aa07c1, 0x00000000 },
+   { 0x00600001, 0x22700229, 0x00aa07c9, 0x00000000 },
+   { 0x00600001, 0x22900229, 0x00aa07e1, 0x00000000 },
+   { 0x00600001, 0x22b00229, 0x00aa07e9, 0x00000000 },
+   { 0x00600001, 0x22d00229, 0x00aa0841, 0x00000000 },
+   { 0x00600001, 0x22f00229, 0x00aa0849, 0x00000000 },
+   { 0x00600001, 0x23100229, 0x00aa0861, 0x00000000 },
+   { 0x00600001, 0x23300229, 0x00aa0869, 0x00000000 },
+   { 0x00600001, 0x23500229, 0x00aa0801, 0x00000000 },
+   { 0x00600001, 0x23700229, 0x00aa0809, 0x00000000 },
+   { 0x00600001, 0x23900229, 0x00aa0821, 0x00000000 },
+   { 0x00600001, 0x23b00229, 0x00aa0829, 0x00000000 },
+   { 0x00600001, 0x23d00229, 0x00aa0881, 0x00000000 },
+   { 0x00600001, 0x23f00229, 0x00aa0889, 0x00000000 },
+   { 0x00600001, 0x24100229, 0x00aa08a1, 0x00000000 },
+   { 0x00600001, 0x24300229, 0x00aa08a9, 0x00000000 },
+   { 0x00600001, 0x22400129, 0x00ae0240, 0x00000000 },
+   { 0x00600001, 0x23400129, 0x00ae0340, 0x00000000 },
+   { 0x00600001, 0x22500129, 0x00ae0280, 0x00000000 },
+   { 0x00600001, 0x23500129, 0x00ae0380, 0x00000000 },
+   { 0x00600001, 0x22600129, 0x00ae02c0, 0x00000000 },
+   { 0x00600001, 0x23600129, 0x00ae03c0, 0x00000000 },
+   { 0x00600001, 0x22700129, 0x00ae0300, 0x00000000 },
+   { 0x00600001, 0x23700129, 0x00ae0400, 0x00000000 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+   { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+   { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+   { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+   { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+   { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+   { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+   { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+   { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+   { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+   { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+   { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+   { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+   { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+   { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00000041, 0x24407fbd, 0x000000bc, 0x41800000 },
+   { 0x00000040, 0x20a477bd, 0x00000440, 0x000000a4 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xfffffece },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000001, 0x20a403bd, 0x00000094, 0x00000000 },
+   { 0x00000041, 0x24407fbd, 0x00000038, 0x41000000 },
+   { 0x00000040, 0x20a877bd, 0x00000440, 0x000000a8 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xfffffec2 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g6b
new file mode 100644
index 0000000..5868243
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g6b
@@ -0,0 +1,243 @@
+   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x01000005, 0x20002d2c, 0x00000088, 0x80008000 },
+   { 0x00010001, 0x20c003fd, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x212003bd, 0x000000c0, 0x00000000 },
+   { 0x00000001, 0x212403bd, 0x000000bc, 0x00000000 },
+   { 0x00000001, 0x213403bd, 0x00000038, 0x00000000 },
+   { 0x00200001, 0x612803bd, 0x004500a4, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
+   { 0x00800001, 0x20000022, 0x008d0100, 0x00000000 },
+   { 0x02000031, 0x25401cc9, 0x00000000, 0x044bb401 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
+   { 0x00800001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x02000031, 0x25c01cc9, 0x00000040, 0x048bb802 },
+   { 0x00000001, 0x240803bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x000000bc, 0x41000000 },
+   { 0x00000048, 0x21287fbd, 0x000000c0, 0x41e00000 },
+   { 0x00000001, 0x240403bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x21247fbd, 0x000000c0, 0x41000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
+   { 0x00800001, 0x20000022, 0x008d0100, 0x00000000 },
+   { 0x02000031, 0x27401cc9, 0x00000000, 0x044bb401 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
+   { 0x00800001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x02000031, 0x27c01cc9, 0x00000040, 0x048bb802 },
+   { 0x00600001, 0x21400229, 0x00aa0541, 0x00000000 },
+   { 0x00600001, 0x21600229, 0x00aa0549, 0x00000000 },
+   { 0x00600001, 0x21800229, 0x00aa0561, 0x00000000 },
+   { 0x00600001, 0x21a00229, 0x00aa0569, 0x00000000 },
+   { 0x00600001, 0x21c00229, 0x00aa0581, 0x00000000 },
+   { 0x00600001, 0x21e00229, 0x00aa0589, 0x00000000 },
+   { 0x00600001, 0x22000229, 0x00aa05a1, 0x00000000 },
+   { 0x00600001, 0x22200229, 0x00aa05a9, 0x00000000 },
+   { 0x00600001, 0x22400229, 0x00aa05c1, 0x00000000 },
+   { 0x00600001, 0x22600229, 0x00aa05c9, 0x00000000 },
+   { 0x00600001, 0x22800229, 0x00aa05e1, 0x00000000 },
+   { 0x00600001, 0x22a00229, 0x00aa05e9, 0x00000000 },
+   { 0x00600001, 0x22c00229, 0x00aa0641, 0x00000000 },
+   { 0x00600001, 0x22e00229, 0x00aa0649, 0x00000000 },
+   { 0x00600001, 0x23000229, 0x00aa0661, 0x00000000 },
+   { 0x00600001, 0x23200229, 0x00aa0669, 0x00000000 },
+   { 0x00600001, 0x23400229, 0x00aa0601, 0x00000000 },
+   { 0x00600001, 0x23600229, 0x00aa0609, 0x00000000 },
+   { 0x00600001, 0x23800229, 0x00aa0621, 0x00000000 },
+   { 0x00600001, 0x23a00229, 0x00aa0629, 0x00000000 },
+   { 0x00600001, 0x23c00229, 0x00aa0681, 0x00000000 },
+   { 0x00600001, 0x23e00229, 0x00aa0689, 0x00000000 },
+   { 0x00600001, 0x24000229, 0x00aa06a1, 0x00000000 },
+   { 0x00600001, 0x24200229, 0x00aa06a9, 0x00000000 },
+   { 0x00600001, 0x21500229, 0x00aa0741, 0x00000000 },
+   { 0x00600001, 0x21700229, 0x00aa0749, 0x00000000 },
+   { 0x00600001, 0x21900229, 0x00aa0761, 0x00000000 },
+   { 0x00600001, 0x21b00229, 0x00aa0769, 0x00000000 },
+   { 0x00600001, 0x21d00229, 0x00aa0781, 0x00000000 },
+   { 0x00600001, 0x21f00229, 0x00aa0789, 0x00000000 },
+   { 0x00600001, 0x22100229, 0x00aa07a1, 0x00000000 },
+   { 0x00600001, 0x22300229, 0x00aa07a9, 0x00000000 },
+   { 0x00600001, 0x22500229, 0x00aa07c1, 0x00000000 },
+   { 0x00600001, 0x22700229, 0x00aa07c9, 0x00000000 },
+   { 0x00600001, 0x22900229, 0x00aa07e1, 0x00000000 },
+   { 0x00600001, 0x22b00229, 0x00aa07e9, 0x00000000 },
+   { 0x00600001, 0x22d00229, 0x00aa0841, 0x00000000 },
+   { 0x00600001, 0x22f00229, 0x00aa0849, 0x00000000 },
+   { 0x00600001, 0x23100229, 0x00aa0861, 0x00000000 },
+   { 0x00600001, 0x23300229, 0x00aa0869, 0x00000000 },
+   { 0x00600001, 0x23500229, 0x00aa0801, 0x00000000 },
+   { 0x00600001, 0x23700229, 0x00aa0809, 0x00000000 },
+   { 0x00600001, 0x23900229, 0x00aa0821, 0x00000000 },
+   { 0x00600001, 0x23b00229, 0x00aa0829, 0x00000000 },
+   { 0x00600001, 0x23d00229, 0x00aa0881, 0x00000000 },
+   { 0x00600001, 0x23f00229, 0x00aa0889, 0x00000000 },
+   { 0x00600001, 0x24100229, 0x00aa08a1, 0x00000000 },
+   { 0x00600001, 0x24300229, 0x00aa08a9, 0x00000000 },
+   { 0x00600001, 0x22400129, 0x00ae0240, 0x00000000 },
+   { 0x00600001, 0x23400129, 0x00ae0340, 0x00000000 },
+   { 0x00600001, 0x22500129, 0x00ae0280, 0x00000000 },
+   { 0x00600001, 0x23500129, 0x00ae0380, 0x00000000 },
+   { 0x00600001, 0x22600129, 0x00ae02c0, 0x00000000 },
+   { 0x00600001, 0x23600129, 0x00ae03c0, 0x00000000 },
+   { 0x00600001, 0x22700129, 0x00ae0300, 0x00000000 },
+   { 0x00600001, 0x23700129, 0x00ae0400, 0x00000000 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+   { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000020, 0x02298008 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+   { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+   { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+   { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+   { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+   { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+   { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+   { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+   { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+   { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+   { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+   { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+   { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+   { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x06094008 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00000041, 0x24407fbd, 0x000000bc, 0x41800000 },
+   { 0x00000040, 0x20a477bd, 0x00000440, 0x000000a4 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xfffffece },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000001, 0x20a403bd, 0x00000094, 0x00000000 },
+   { 0x00000041, 0x24407fbd, 0x00000038, 0x41000000 },
+   { 0x00000040, 0x20a877bd, 0x00000440, 0x000000a8 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xfffffec2 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_dn_nv12.asm b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.asm
new file mode 100644
index 0000000..690d4c9
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.asm
@@ -0,0 +1,26 @@
+// Module name: NV12_DN_NV12
+.kernel NV12_DN_NV12
+.code
+
+#define INC_DN
+        
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+
+#define LOAD_UV_ONLY
+#include "NV12_Load_8x4.asm"
+#undef LOAD_UV_ONLY
+
+#include "PL_DN_ALG.asm"        
+        
+#include "PL8x4_Save_NV12.asm"
+        
+#include "Multiple_Loop.asm"
+
+END_THREAD  // End of Thread
+
+.end_code  
+
+.end_kernel
+
+// end of nv12_dn_nv12.asm
diff --git a/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5
new file mode 100644
index 0000000..13164b3
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5
@@ -0,0 +1,113 @@
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x02600031, 0x28000c01, 0x408d0000, 0x0228a002 },
+   { 0x00800001, 0x22600229, 0x00d20820, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x00d20821, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x00d20800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x00d20801, 0x00000000 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
+   { 0x01600031, 0x24400c01, 0x208d0000, 0x045b8004 },
+   { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10480, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10490, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b104a0, 0x00000000 },
+   { 0x00800001, 0x22200229, 0x00b104b0, 0x00000000 },
+   { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
+   { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00010003 },
+   { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
+   { 0x00200001, 0x21c00022, 0x004504c0, 0x00000000 },
+   { 0x0d600031, 0x20000c04, 0x508d0000, 0x04082014 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+   { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+   { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+   { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+   { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+   { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+   { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+   { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+   { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+   { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+   { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+   { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+   { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+   { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+   { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff3a },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff34 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g6b
new file mode 100644
index 0000000..0ba2f55
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g6b
@@ -0,0 +1,186 @@
+   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000040, 0x02298002 },
+   { 0x00800001, 0x22600229, 0x00d20820, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x00d20821, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x00d20800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x00d20801, 0x00000000 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
+   { 0x02600031, 0x24400cc1, 0x00000020, 0x045b8004 },
+   { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10480, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10490, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b104a0, 0x00000000 },
+   { 0x00800001, 0x22200229, 0x00b104b0, 0x00000000 },
+   { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
+   { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00010003 },
+   { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
+   { 0x00200001, 0x21c00022, 0x004504c0, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x000001a0, 0x04094014 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+   { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000020, 0x02298008 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+   { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+   { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+   { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+   { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+   { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+   { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+   { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+   { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+   { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+   { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+   { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+   { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+   { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x06094008 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff3a },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff34 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/nv12_dndi_nv12.asm b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.asm
similarity index 100%
rename from src/shaders/post_processing/nv12_dndi_nv12.asm
rename to src/shaders/post_processing/gen5_6/nv12_dndi_nv12.asm
diff --git a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5
new file mode 100644
index 0000000..aee45d1
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5
@@ -0,0 +1,90 @@
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
+   { 0x01600031, 0x24400c01, 0x208d0000, 0x04cb8004 },
+   { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
+   { 0x00600001, 0x22400229, 0x00ae0481, 0x00000000 },
+   { 0x00600001, 0x23400229, 0x00ae0480, 0x00000000 },
+   { 0x00600001, 0x22500229, 0x00ae0491, 0x00000000 },
+   { 0x00600001, 0x23500229, 0x00ae0490, 0x00000000 },
+   { 0x00600001, 0x22600229, 0x00ae04a1, 0x00000000 },
+   { 0x00600001, 0x23600229, 0x00ae04a0, 0x00000000 },
+   { 0x00600001, 0x22700229, 0x00ae04b1, 0x00000000 },
+   { 0x00600001, 0x23700229, 0x00ae04b0, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b104c0, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b104d0, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b104e0, 0x00000000 },
+   { 0x00800001, 0x22200229, 0x00b104f0, 0x00000000 },
+   { 0x00600001, 0x22800229, 0x00ae0501, 0x00000000 },
+   { 0x00600001, 0x23800229, 0x00ae0500, 0x00000000 },
+   { 0x00600001, 0x22900229, 0x00ae0511, 0x00000000 },
+   { 0x00600001, 0x23900229, 0x00ae0510, 0x00000000 },
+   { 0x00600001, 0x22a00229, 0x00ae0521, 0x00000000 },
+   { 0x00600001, 0x23a00229, 0x00ae0520, 0x00000000 },
+   { 0x00600001, 0x22b00229, 0x00ae0531, 0x00000000 },
+   { 0x00600001, 0x23b00229, 0x00ae0530, 0x00000000 },
+   { 0x00000008, 0x21003da1, 0x000000a0, 0x00010001 },
+   { 0x00000001, 0x210401a1, 0x000000a2, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x21600022, 0x008d0100, 0x00000000 },
+   { 0x00600001, 0x21800022, 0x008d0540, 0x00000000 },
+   { 0x0b600031, 0x20000c04, 0x508d0000, 0x04082014 },
+   { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
+   { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00000003 },
+   { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 },
+   { 0x0d600031, 0x20000c04, 0x508d0000, 0x04082014 },
+   { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+   { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a },
+   { 0x00400001, 0x20400022, 0x00690580, 0x00000000 },
+   { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 },
+   { 0x00400001, 0x20600022, 0x00690590, 0x00000000 },
+   { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 },
+   { 0x00400001, 0x20500022, 0x00690580, 0x00000000 },
+   { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 },
+   { 0x00400001, 0x20700022, 0x00690590, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00b104c0, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00b104d0, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00b104e0, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00b104f0, 0x00000000 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x06082007 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x28000c01, 0x408d0000, 0x0218a002 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00600001, 0x20400022, 0x008d0800, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff68 },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff62 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b
new file mode 100644
index 0000000..29003af
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b
@@ -0,0 +1,163 @@
+   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
+   { 0x02600031, 0x24400cc1, 0x00000020, 0x04cb8004 },
+   { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
+   { 0x00600001, 0x22400229, 0x00ae0481, 0x00000000 },
+   { 0x00600001, 0x23400229, 0x00ae0480, 0x00000000 },
+   { 0x00600001, 0x22500229, 0x00ae0491, 0x00000000 },
+   { 0x00600001, 0x23500229, 0x00ae0490, 0x00000000 },
+   { 0x00600001, 0x22600229, 0x00ae04a1, 0x00000000 },
+   { 0x00600001, 0x23600229, 0x00ae04a0, 0x00000000 },
+   { 0x00600001, 0x22700229, 0x00ae04b1, 0x00000000 },
+   { 0x00600001, 0x23700229, 0x00ae04b0, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b104c0, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b104d0, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b104e0, 0x00000000 },
+   { 0x00800001, 0x22200229, 0x00b104f0, 0x00000000 },
+   { 0x00600001, 0x22800229, 0x00ae0501, 0x00000000 },
+   { 0x00600001, 0x23800229, 0x00ae0500, 0x00000000 },
+   { 0x00600001, 0x22900229, 0x00ae0511, 0x00000000 },
+   { 0x00600001, 0x23900229, 0x00ae0510, 0x00000000 },
+   { 0x00600001, 0x22a00229, 0x00ae0521, 0x00000000 },
+   { 0x00600001, 0x23a00229, 0x00ae0520, 0x00000000 },
+   { 0x00600001, 0x22b00229, 0x00ae0531, 0x00000000 },
+   { 0x00600001, 0x23b00229, 0x00ae0530, 0x00000000 },
+   { 0x00000008, 0x21003da1, 0x000000a0, 0x00010001 },
+   { 0x00000001, 0x210401a1, 0x000000a2, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x21600022, 0x008d0100, 0x00000000 },
+   { 0x00600001, 0x21800022, 0x008d0540, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000160, 0x04094014 },
+   { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
+   { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00000003 },
+   { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x000001a0, 0x04094014 },
+   { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+   { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a },
+   { 0x00400001, 0x20400022, 0x00690580, 0x00000000 },
+   { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 },
+   { 0x00400001, 0x20600022, 0x00690590, 0x00000000 },
+   { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 },
+   { 0x00400001, 0x20500022, 0x00690580, 0x00000000 },
+   { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 },
+   { 0x00400001, 0x20700022, 0x00690590, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00b104c0, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00b104d0, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00b104e0, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00b104f0, 0x00000000 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x06094007 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000020, 0x02198002 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00600001, 0x20400022, 0x008d0800, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff68 },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff62 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/nv12_load_save_nv12.asm b/src/shaders/post_processing/gen5_6/nv12_load_save_nv12.asm
similarity index 100%
rename from src/shaders/post_processing/nv12_load_save_nv12.asm
rename to src/shaders/post_processing/gen5_6/nv12_load_save_nv12.asm
diff --git a/src/shaders/post_processing/nv12_load_save_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5
similarity index 100%
rename from src/shaders/post_processing/nv12_load_save_nv12.g4b.gen5
rename to src/shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5
diff --git a/src/shaders/post_processing/nv12_load_save_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b
similarity index 100%
rename from src/shaders/post_processing/nv12_load_save_nv12.g6b
rename to src/shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pa.asm b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.asm
new file mode 100755
index 0000000..3fa4494
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.asm
@@ -0,0 +1,18 @@
+// Module name: NV12_LOAD_SAVE_pl1
+.kernel NV12_LOAD_SAVE_PL1 // what's usage of it? just a name?
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "NV12_Load_8x5.asm"   
+#include "PL8x5_PL8x8.asm"     
+#include "PL8x8_Save_PA.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD  // End of Thread
+
+.end_code  
+
+.end_kernel
+
+// end of nv12_load_save_pl1.asm
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5
new file mode 100644
index 0000000..dad88db
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5
@@ -0,0 +1,117 @@
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a001 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0004000f },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x02600031, 0x28000c01, 0x408d0000, 0x0238a002 },
+   { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+   { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+   { 0x00800001, 0x22800229, 0x00ae0840, 0x00000000 },
+   { 0x00800001, 0x23800229, 0x00ae0841, 0x00000000 },
+   { 0x00800001, 0x22600229, 0x00ae0820, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x00ae0821, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x00ae0800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x00ae0801, 0x00000000 },
+   { 0x80600042, 0x22b02529, 0x008d0270, 0x008d0280 },
+   { 0x80600042, 0x23b02529, 0x008d0370, 0x008d0380 },
+   { 0x00600001, 0x22a00129, 0x008d0270, 0x00000000 },
+   { 0x80600042, 0x22902529, 0x008d0260, 0x008d0270 },
+   { 0x00600001, 0x23a00129, 0x008d0370, 0x00000000 },
+   { 0x80600042, 0x23902529, 0x008d0360, 0x008d0370 },
+   { 0x00600001, 0x22800129, 0x008d0260, 0x00000000 },
+   { 0x80600042, 0x22702529, 0x008d0250, 0x008d0260 },
+   { 0x00600001, 0x23800129, 0x008d0360, 0x00000000 },
+   { 0x80600042, 0x23702529, 0x008d0350, 0x008d0360 },
+   { 0x00600001, 0x22600129, 0x008d0250, 0x00000000 },
+   { 0x80600042, 0x22502529, 0x008d0240, 0x008d0250 },
+   { 0x00600001, 0x23600129, 0x008d0350, 0x00000000 },
+   { 0x80600042, 0x23502529, 0x008d0340, 0x008d0350 },
+   { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 },
+   { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 },
+   { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 },
+   { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 },
+   { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 },
+   { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 },
+   { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 },
+   { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 },
+   { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 },
+   { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 },
+   { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 },
+   { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 },
+   { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 },
+   { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 },
+   { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 },
+   { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 },
+   { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 },
+   { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 },
+   { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 },
+   { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0007001f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a },
+   { 0x01600031, 0x21400c01, 0x408d0000, 0x0288a007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 },
+   { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 },
+   { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 },
+   { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 },
+   { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 },
+   { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 },
+   { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 },
+   { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 },
+   { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x12082007 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff32 },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff2c },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g6b b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g6b
new file mode 100644
index 0000000..5de798e
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g6b
@@ -0,0 +1,190 @@
+   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498001 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0004000f },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000040, 0x02398002 },
+   { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+   { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+   { 0x00800001, 0x22800229, 0x00ae0840, 0x00000000 },
+   { 0x00800001, 0x23800229, 0x00ae0841, 0x00000000 },
+   { 0x00800001, 0x22600229, 0x00ae0820, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x00ae0821, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x00ae0800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x00ae0801, 0x00000000 },
+   { 0x80600042, 0x22b02529, 0x008d0270, 0x008d0280 },
+   { 0x80600042, 0x23b02529, 0x008d0370, 0x008d0380 },
+   { 0x00600001, 0x22a00129, 0x008d0270, 0x00000000 },
+   { 0x80600042, 0x22902529, 0x008d0260, 0x008d0270 },
+   { 0x00600001, 0x23a00129, 0x008d0370, 0x00000000 },
+   { 0x80600042, 0x23902529, 0x008d0360, 0x008d0370 },
+   { 0x00600001, 0x22800129, 0x008d0260, 0x00000000 },
+   { 0x80600042, 0x22702529, 0x008d0250, 0x008d0260 },
+   { 0x00600001, 0x23800129, 0x008d0360, 0x00000000 },
+   { 0x80600042, 0x23702529, 0x008d0350, 0x008d0360 },
+   { 0x00600001, 0x22600129, 0x008d0250, 0x00000000 },
+   { 0x80600042, 0x22502529, 0x008d0240, 0x008d0250 },
+   { 0x00600001, 0x23600129, 0x008d0350, 0x00000000 },
+   { 0x80600042, 0x23502529, 0x008d0340, 0x008d0350 },
+   { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 },
+   { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 },
+   { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 },
+   { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 },
+   { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 },
+   { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 },
+   { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 },
+   { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 },
+   { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 },
+   { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 },
+   { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 },
+   { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 },
+   { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 },
+   { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 },
+   { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 },
+   { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 },
+   { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 },
+   { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 },
+   { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 },
+   { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0007001f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a },
+   { 0x04600031, 0x21400cc1, 0x00000020, 0x02898007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 },
+   { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 },
+   { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 },
+   { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 },
+   { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 },
+   { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 },
+   { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 },
+   { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 },
+   { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x12094007 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff32 },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff2c },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.asm b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.asm
new file mode 100644
index 0000000..9fa44a1
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: NV12_LOAD_SAVE_PL3
+.kernel NV12_LOAD_SAVE_PL3
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "NV12_Load_8x4.asm"        
+#include "PL8x4_Save_IMC3.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD  // End of Thread
+
+.end_code  
+
+.end_kernel
+
+// end of nv12_load_save_pl3.asm
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5
new file mode 100644
index 0000000..9ca4063
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5
@@ -0,0 +1,105 @@
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a001 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x02600031, 0x28000c01, 0x408d0000, 0x0228a002 },
+   { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+   { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+   { 0x00800001, 0x22600229, 0x00d20820, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x00d20821, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x00d20800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x00d20801, 0x00000000 },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000052 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x28000c01, 0x408d0000, 0x0218a008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x29000c01, 0x408d0000, 0x0218a009 },
+   { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+   { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+   { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+   { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+   { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+   { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x04082009 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff4a },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff44 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b
new file mode 100644
index 0000000..819280d
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b
@@ -0,0 +1,178 @@
+   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498001 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000040, 0x02298002 },
+   { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+   { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+   { 0x00800001, 0x22600229, 0x00d20820, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x00d20821, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x00d20800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x00d20801, 0x00000000 },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000052 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000020, 0x02198008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x29000cc1, 0x00000020, 0x02198009 },
+   { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+   { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+   { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+   { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+   { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+   { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x04094009 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff4a },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff44 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/nv12_scaling_nv12.asm b/src/shaders/post_processing/gen5_6/nv12_scaling_nv12.asm
similarity index 100%
rename from src/shaders/post_processing/nv12_scaling_nv12.asm
rename to src/shaders/post_processing/gen5_6/nv12_scaling_nv12.asm
diff --git a/src/shaders/post_processing/nv12_scaling_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5
similarity index 100%
rename from src/shaders/post_processing/nv12_scaling_nv12.g4b.gen5
rename to src/shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5
diff --git a/src/shaders/post_processing/nv12_scaling_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_scaling_nv12.g6b
similarity index 100%
rename from src/shaders/post_processing/nv12_scaling_nv12.g6b
rename to src/shaders/post_processing/gen5_6/nv12_scaling_nv12.g6b
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_nv12.asm b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.asm
new file mode 100755
index 0000000..bd68a92
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.asm
@@ -0,0 +1,18 @@
+// Module name: PA_LOAD_SAVE_NV12
+.kernel PA_LOAD_SAVE_NV12 
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "PA_Load_8x8.asm"   
+#include "PL8x8_PL8x4.asm"     
+#include "PL8x4_Save_NV12.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD  // End of Thread
+
+.end_code  
+
+.end_kernel
+
+// end of nv12_load_save_pl1.asm
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5
new file mode 100644
index 0000000..af53ccd
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5
@@ -0,0 +1,120 @@
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007001f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0288a001 },
+   { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+   { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 },
+   { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 },
+   { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 },
+   { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 },
+   { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 },
+   { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 },
+   { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 },
+   { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 },
+   { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 },
+   { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 },
+   { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 },
+   { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 },
+   { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 },
+   { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 },
+   { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 },
+   { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 },
+   { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 },
+   { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 },
+   { 0x00600001, 0x42500231, 0x00ae0260, 0x00000000 },
+   { 0x00800001, 0x42600231, 0x00ce0280, 0x00000000 },
+   { 0x00600001, 0x43500231, 0x00ae0360, 0x00000000 },
+   { 0x00800001, 0x43600231, 0x00ce0380, 0x00000000 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+   { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+   { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+   { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+   { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+   { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+   { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+   { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+   { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+   { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+   { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+   { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+   { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+   { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+   { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff2c },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff26 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g6b b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g6b
new file mode 100644
index 0000000..343bd1c
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g6b
@@ -0,0 +1,193 @@
+   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007001f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02898001 },
+   { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+   { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 },
+   { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 },
+   { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 },
+   { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 },
+   { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 },
+   { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 },
+   { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 },
+   { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 },
+   { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 },
+   { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 },
+   { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 },
+   { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 },
+   { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 },
+   { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 },
+   { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 },
+   { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 },
+   { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 },
+   { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 },
+   { 0x00600001, 0x42500231, 0x00ae0260, 0x00000000 },
+   { 0x00800001, 0x42600231, 0x00ce0280, 0x00000000 },
+   { 0x00600001, 0x43500231, 0x00ae0360, 0x00000000 },
+   { 0x00800001, 0x43600231, 0x00ce0380, 0x00000000 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+   { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000020, 0x02298008 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+   { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+   { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+   { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+   { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+   { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+   { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+   { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+   { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+   { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+   { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+   { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+   { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+   { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x06094008 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff2c },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff26 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pl3.asm b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.asm
new file mode 100755
index 0000000..9a79ac1
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.asm
@@ -0,0 +1,18 @@
+// Module name: PA_LOAD_SAVE_PL3
+.kernel PA_LOAD_SAVE_PL3 
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "PA_Load_8x8.asm"   
+#include "PL8x8_PL8x4.asm"     
+#include "PL8x4_Save_IMC3.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD  // End of Thread
+
+.end_code  
+
+.end_kernel
+
+// end of pa_load_save_pl3.asm
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5 b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5
new file mode 100755
index 0000000..21ff3d7
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5
@@ -0,0 +1,123 @@
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007001f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0288a001 },
+   { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+   { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 },
+   { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 },
+   { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 },
+   { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 },
+   { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 },
+   { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 },
+   { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 },
+   { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 },
+   { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 },
+   { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 },
+   { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 },
+   { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 },
+   { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 },
+   { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 },
+   { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 },
+   { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 },
+   { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 },
+   { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 },
+   { 0x00600001, 0x42500231, 0x00ae0260, 0x00000000 },
+   { 0x00800001, 0x42600231, 0x00ce0280, 0x00000000 },
+   { 0x00600001, 0x43500231, 0x00ae0360, 0x00000000 },
+   { 0x00800001, 0x43600231, 0x00ce0380, 0x00000000 },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000005a },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x28000c01, 0x408d0000, 0x0218a008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x29000c01, 0x408d0000, 0x0218a009 },
+   { 0x00800001, 0x28600229, 0x008d0830, 0x00000000 },
+   { 0x00800001, 0x29600229, 0x008d0930, 0x00000000 },
+   { 0x00800001, 0x28400229, 0x008d0820, 0x00000000 },
+   { 0x00800001, 0x29400229, 0x008d0920, 0x00000000 },
+   { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+   { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+   { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+   { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+   { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+   { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x04082009 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff26 },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff20 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g6b b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g6b
new file mode 100755
index 0000000..55c0fed
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g6b
@@ -0,0 +1,196 @@
+   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007001f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02898001 },
+   { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+   { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 },
+   { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 },
+   { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 },
+   { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 },
+   { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 },
+   { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 },
+   { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 },
+   { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 },
+   { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 },
+   { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 },
+   { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 },
+   { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 },
+   { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 },
+   { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 },
+   { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 },
+   { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 },
+   { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 },
+   { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 },
+   { 0x00600001, 0x42500231, 0x00ae0260, 0x00000000 },
+   { 0x00800001, 0x42600231, 0x00ce0280, 0x00000000 },
+   { 0x00600001, 0x43500231, 0x00ae0360, 0x00000000 },
+   { 0x00800001, 0x43600231, 0x00ce0380, 0x00000000 },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000005a },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000020, 0x02198008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x29000cc1, 0x00000020, 0x02198009 },
+   { 0x00800001, 0x28600229, 0x008d0830, 0x00000000 },
+   { 0x00800001, 0x29600229, 0x008d0930, 0x00000000 },
+   { 0x00800001, 0x28400229, 0x008d0820, 0x00000000 },
+   { 0x00800001, 0x29400229, 0x008d0920, 0x00000000 },
+   { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+   { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+   { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+   { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+   { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+   { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x04094009 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff26 },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff20 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.asm b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.asm
new file mode 100644
index 0000000..cd1b5fe
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.asm
@@ -0,0 +1,17 @@
+// Module name: PL3_LOAD_SAVE_NV12
+.kernel PL3_LOAD_SAVE_NV12
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "IMC3_Load_8x4.asm"        
+#include "PL8x4_Save_NV12.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD  // End of Thread
+
+.end_code  
+
+.end_kernel
+
+// end of pl3_load_save_nv12.asm
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5
new file mode 100644
index 0000000..cf31c50
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5
@@ -0,0 +1,108 @@
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a001 },
+   { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x02600031, 0x28000c01, 0x408d0000, 0x0218a002 },
+   { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+   { 0x02600031, 0x29000c01, 0x408d0000, 0x0218a003 },
+   { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+   { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+   { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+   { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+   { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+   { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+   { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+   { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+   { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+   { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+   { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+   { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+   { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+   { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+   { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+   { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+   { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff44 },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff3e },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b
new file mode 100644
index 0000000..437ba56
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b
@@ -0,0 +1,181 @@
+   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498001 },
+   { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000040, 0x02198002 },
+   { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x29000cc1, 0x00000040, 0x02198003 },
+   { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+   { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+   { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+   { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000020, 0x02298008 },
+   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+   { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+   { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+   { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+   { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+   { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+   { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+   { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+   { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+   { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+   { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+   { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+   { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+   { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x06094008 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff44 },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff3e },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pa.asm b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.asm
new file mode 100755
index 0000000..11efe0a
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.asm
@@ -0,0 +1,18 @@
+// Module name: PL3_LOAD_SAVE_pa
+.kernel PL3_LOAD_SAVE_PA // what's usage of it? just a name?
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "IMC3_Load_8x5.asm"   
+#include "PL8x5_PL8x8.asm"     
+#include "PL8x8_Save_PA.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD  // End of Thread
+
+.end_code  
+
+.end_kernel
+
+// end of pl3_load_save_pa.asm
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5 b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5
new file mode 100644
index 0000000..5a58923
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5
@@ -0,0 +1,119 @@
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a001 },
+   { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00040007 },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x02600031, 0x28000c01, 0x408d0000, 0x0228a002 },
+   { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+   { 0x02600031, 0x29000c01, 0x408d0000, 0x0228a003 },
+   { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+   { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+   { 0x00800001, 0x22800229, 0x008d0820, 0x00000000 },
+   { 0x00800001, 0x23800229, 0x008d0920, 0x00000000 },
+   { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+   { 0x80600042, 0x22b02529, 0x008d0270, 0x008d0280 },
+   { 0x80600042, 0x23b02529, 0x008d0370, 0x008d0380 },
+   { 0x00600001, 0x22a00129, 0x008d0270, 0x00000000 },
+   { 0x80600042, 0x22902529, 0x008d0260, 0x008d0270 },
+   { 0x00600001, 0x23a00129, 0x008d0370, 0x00000000 },
+   { 0x80600042, 0x23902529, 0x008d0360, 0x008d0370 },
+   { 0x00600001, 0x22800129, 0x008d0260, 0x00000000 },
+   { 0x80600042, 0x22702529, 0x008d0250, 0x008d0260 },
+   { 0x00600001, 0x23800129, 0x008d0360, 0x00000000 },
+   { 0x80600042, 0x23702529, 0x008d0350, 0x008d0360 },
+   { 0x00600001, 0x22600129, 0x008d0250, 0x00000000 },
+   { 0x80600042, 0x22502529, 0x008d0240, 0x008d0250 },
+   { 0x00600001, 0x23600129, 0x008d0350, 0x00000000 },
+   { 0x80600042, 0x23502529, 0x008d0340, 0x008d0350 },
+   { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 },
+   { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 },
+   { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 },
+   { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 },
+   { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 },
+   { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 },
+   { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 },
+   { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 },
+   { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 },
+   { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 },
+   { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 },
+   { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 },
+   { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 },
+   { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 },
+   { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 },
+   { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 },
+   { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 },
+   { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 },
+   { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 },
+   { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0007001f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a },
+   { 0x01600031, 0x21400c01, 0x408d0000, 0x0288a007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 },
+   { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 },
+   { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 },
+   { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 },
+   { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 },
+   { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 },
+   { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 },
+   { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 },
+   { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x12082007 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff2e },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff28 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g6b b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g6b
new file mode 100644
index 0000000..be37861
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g6b
@@ -0,0 +1,192 @@
+   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498001 },
+   { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00040007 },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000040, 0x02298002 },
+   { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x29000cc1, 0x00000040, 0x02298003 },
+   { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+   { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+   { 0x00800001, 0x22800229, 0x008d0820, 0x00000000 },
+   { 0x00800001, 0x23800229, 0x008d0920, 0x00000000 },
+   { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+   { 0x80600042, 0x22b02529, 0x008d0270, 0x008d0280 },
+   { 0x80600042, 0x23b02529, 0x008d0370, 0x008d0380 },
+   { 0x00600001, 0x22a00129, 0x008d0270, 0x00000000 },
+   { 0x80600042, 0x22902529, 0x008d0260, 0x008d0270 },
+   { 0x00600001, 0x23a00129, 0x008d0370, 0x00000000 },
+   { 0x80600042, 0x23902529, 0x008d0360, 0x008d0370 },
+   { 0x00600001, 0x22800129, 0x008d0260, 0x00000000 },
+   { 0x80600042, 0x22702529, 0x008d0250, 0x008d0260 },
+   { 0x00600001, 0x23800129, 0x008d0360, 0x00000000 },
+   { 0x80600042, 0x23702529, 0x008d0350, 0x008d0360 },
+   { 0x00600001, 0x22600129, 0x008d0250, 0x00000000 },
+   { 0x80600042, 0x22502529, 0x008d0240, 0x008d0250 },
+   { 0x00600001, 0x23600129, 0x008d0350, 0x00000000 },
+   { 0x80600042, 0x23502529, 0x008d0340, 0x008d0350 },
+   { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 },
+   { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 },
+   { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 },
+   { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 },
+   { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 },
+   { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 },
+   { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 },
+   { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 },
+   { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 },
+   { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 },
+   { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 },
+   { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 },
+   { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 },
+   { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 },
+   { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 },
+   { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 },
+   { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 },
+   { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 },
+   { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 },
+   { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 },
+   { 0x00000801, 0x21080061, 0x00000000, 0x0007001f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a },
+   { 0x04600031, 0x21400cc1, 0x00000020, 0x02898007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 },
+   { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 },
+   { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 },
+   { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 },
+   { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 },
+   { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 },
+   { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 },
+   { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 },
+   { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x12094007 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff2e },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff28 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.asm b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.asm
new file mode 100644
index 0000000..f2e9406
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: PL3_LOAD_SAVE_pl3
+.kernel PL3_LOAD_SAVE_PL3
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "IMC3_Load_8x4.asm"        
+#include "PL8x4_Save_IMC3.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD  // End of Thread
+
+.end_code  
+
+.end_kernel
+
+// end of pl3_load_save_pl3.asm
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5 b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5
new file mode 100644
index 0000000..26fa256
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5
@@ -0,0 +1,107 @@
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a001 },
+   { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x02600031, 0x28000c01, 0x408d0000, 0x0218a002 },
+   { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+   { 0x02600031, 0x29000c01, 0x408d0000, 0x0218a003 },
+   { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+   { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+   { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000052 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x28000c01, 0x408d0000, 0x0218a008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x01600031, 0x29000c01, 0x408d0000, 0x0218a009 },
+   { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+   { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+   { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+   { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+   { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+   { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+   { 0x01600031, 0x20000c04, 0x508d0000, 0x04082009 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff46 },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff40 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b
new file mode 100644
index 0000000..c9ee1a1
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b
@@ -0,0 +1,180 @@
+   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498001 },
+   { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000040, 0x02198002 },
+   { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x29000cc1, 0x00000040, 0x02198003 },
+   { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+   { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+   { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+   { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+   { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+   { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+   { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+   { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+   { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000052 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x28000cc1, 0x00000020, 0x02198008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x04600031, 0x29000cc1, 0x00000020, 0x02198009 },
+   { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+   { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+   { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+   { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+   { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+   { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+   { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+   { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+   { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+   { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+   { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+   { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+   { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+   { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+   { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+   { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+   { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+   { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+   { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+   { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
+   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+   { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+   { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+   { 0x05600031, 0x20000cc4, 0x00000020, 0x04094009 },
+   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff46 },
+   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff40 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/DI_Core.g4a b/src/shaders/post_processing/gen7/DI_Core.g4a
new file mode 100644
index 0000000..952e1d4
--- /dev/null
+++ b/src/shaders/post_processing/gen7/DI_Core.g4a
@@ -0,0 +1,309 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   22    // Total instruction count
+//    1    // Total kernel count
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// End of common.inc
+
+// FileName:	DI.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DI only case (16x4 block)
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
diff --git a/src/shaders/post_processing/gen7/DI_Save_NV12_16x4.g4a b/src/shaders/post_processing/gen7/DI_Save_NV12_16x4.g4a
new file mode 100644
index 0000000..68ef504
--- /dev/null
+++ b/src/shaders/post_processing/gen7/DI_Save_NV12_16x4.g4a
@@ -0,0 +1,279 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   20    // Total instruction count
+//    1    // Total kernel count
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// End of common.inc
+
+// FileName:	DI_Save_NV12_16x4.asm
+// Author:		Vivek Kumar
+// Description:	Save two 16x4 blocks of DI output in NV12 format
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+mov (2) r27.0<1>:d		r7.0<2;2,1>:w                        { NoDDClr }
+mov (1) r27.2<1>:ud     0x3000F:ud		          { NoDDChk }		// Block width and height (16x4)
+
+//Bottom field Y
+	mov (8)   mudMSGHDR_DI_OUT1(1)<1>		udDNDI_RESP(0,0)
+	mov (8)   mudMSGHDR_DI_OUT1(2)<1>		udDNDI_RESP(0,8)
+// Top field Y
+	mov (8)   mudMSGHDR_DI_OUT2(1)<1>	udDNDI_RESP(4,0)
+	mov (8)   mudMSGHDR_DI_OUT2(2)<1>	udDNDI_RESP(4,8)
+
+//copy message desrcptor to the message header
+mov (8) r18.0<1>:ud		r27<8;8,1>:ud
+mov (8) r23.0<1>:ud		r27<8;8,1>:ud
+
+//Change origin to U/V block
+asr (1) r27.1<1>:d		r27.1<0;1,0>:d    1:w			{ NoDDClr }    		// U/V block origin should be half of Y's
+mov (1) r27.2<1>:ud     0x1000F:ud		{ NoDDChk }    		// Block width and height (16x2)
+
+// Bottom field U/V
+mov (16)   r21.0<2>:ub       ubDNDI_RESP(2, 1)<32;8,2>	{ NoDDClr }
+mov (16)   r21.1<2>:ub       ubDNDI_RESP(2, 0)<32;8,2>	{ NoDDChk }
+
+// Top field U/V
+mov (16)   r26.0<2>:ub       ubDNDI_RESP(6, 1)<32;8,2>	{ NoDDClr }
+mov (16)   r26.1<2>:ub       ubDNDI_RESP(6, 0)<32;8,2>	{ NoDDChk }
+
+//copy message desrcptor to the message header
+mov (8) r21<1>:ud			r27<8;8,1>:ud
+mov (8) r26<1>:ud			r27<8;8,1>:ud
+
+//Send out Y component on previous frame to surface
+send (8)    null<1>:d    r18.0		0x5    0x60A801B:ud
+//Send out Y component on current frame to surface
+send (8)    null<1>:d    r23.0 	0x5    0x60A801E:ud
+//Send out U/V component on previous frame to surface
+send (8)    null<1>:d    r21	0x5    0x40A801C:ud
+//Send out U/V component on current frame to surface
+send (8)    null<1>:d    r26	0x5    0x40A801F:ud
diff --git a/src/shaders/post_processing/gen7/DI_Save_PA_16x4.g4a b/src/shaders/post_processing/gen7/DI_Save_PA_16x4.g4a
new file mode 100644
index 0000000..a59054d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/DI_Save_PA_16x4.g4a
@@ -0,0 +1,289 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   33    // Total instruction count
+//    1    // Total kernel count
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// End of common.inc
+
+// FileName:    DI_Save_PA_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+add (4) a0.4<1>:uw   r2.28<4;4,1>:ub   608:w               // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud     r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d      r7.0<0;1,0>:w            1:w  { NoDDClr }          // H. block origin need to be doubled
+mov (1) r27.1<1>:d      r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r27.2<1>:ud     0x3001F:ud          { NoDDChk }          // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r27<8;8,1>:ud
+mov (8) r23.0<1>:ud       r27<8;8,1>:ud
+
+// Pack 2nd field Y
+    mov (16)    r[a0.4, 0]<2>      ubDNDI_RESP(0,0)               { NoDDClr }
+    mov (16)    r[a0.4, 32]<2>      ubDNDI_RESP(0,16)               { NoDDClr }
+    mov (16)    r[a0.4, 64]<2>      ubDNDI_RESP(0,32)               { NoDDClr }
+    mov (16)    r[a0.4, 96]<2>      ubDNDI_RESP(0,48)               { NoDDClr }
+// Pack 2nd field U
+    mov (8)     r[a0.5, 0]<4>      ubDNDI_RESP(2,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 32]<4>      ubDNDI_RESP(2,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 64]<4>      ubDNDI_RESP(2,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 96]<4>      ubDNDI_RESP(2,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 2nd field V
+    mov (8)     r[a0.6, 0]<4>      ubDNDI_RESP(2,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 32]<4>      ubDNDI_RESP(2,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 64]<4>      ubDNDI_RESP(2,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 96]<4>      ubDNDI_RESP(2,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+// Pack 1st field Y
+    mov (16)    r[a0.4, 160]<2>    ubDNDI_RESP(4,0)               { NoDDClr }
+    mov (16)    r[a0.4, 192]<2>    ubDNDI_RESP(4,16)               { NoDDClr }
+    mov (16)    r[a0.4, 224]<2>    ubDNDI_RESP(4,32)               { NoDDClr }
+    mov (16)    r[a0.4, 256]<2>    ubDNDI_RESP(4,48)               { NoDDClr }
+// Pack 1st field U
+    mov (8)     r[a0.5, 160]<4>    ubDNDI_RESP(6,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 192]<4>    ubDNDI_RESP(6,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 224]<4>    ubDNDI_RESP(6,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 256]<4>    ubDNDI_RESP(6,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 1st field V
+    mov (8)     r[a0.6, 160]<4>    ubDNDI_RESP(6,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 192]<4>    ubDNDI_RESP(6,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 224]<4>    ubDNDI_RESP(6,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 256]<4>    ubDNDI_RESP(6,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+//save the previous frame
+send (8)    null<1>:d    r18.0     0x5     0xA0A801B:ud
+
+//save the current frame
+send (8)    null<1>:d    r23.0     0x5     0xA0A801E:ud
diff --git a/src/shaders/post_processing/gen7/EOT.g4a b/src/shaders/post_processing/gen7/EOT.g4a
new file mode 100644
index 0000000..72c3da3
--- /dev/null
+++ b/src/shaders/post_processing/gen7/EOT.g4a
@@ -0,0 +1,166 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//    2    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
diff --git a/src/shaders/post_processing/gen7/Makefile.am b/src/shaders/post_processing/gen7/Makefile.am
new file mode 100644
index 0000000..587f266
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Makefile.am
@@ -0,0 +1,97 @@
+INTEL_PP_G7B = \
+	avs.g7b			\
+	dndi.g7b		\
+	nv12_dn_nv12.g7b	\
+	pa_to_pl2.g7b		\
+	pa_to_pl3.g7b		\
+	pl2_to_pa.g7b		\
+	pl2_to_pl2.g7b		\
+	pl2_to_pl3.g7b		\
+	pl3_to_pa.g7b		\
+	pl3_to_pl2.g7b		\
+	pl3_to_pl3.g7b		\
+	$(NULL)
+
+INTEL_PP_G4A = \
+	DI_Core.g4a			\
+	DI_Save_NV12_16x4.g4a		\
+	DI_Save_PA_16x4.g4a		\
+	EOT.g4a				\
+	NV12_DI_NV12.g4a		\
+	NV12_DNDI_422CP.g4a		\
+	NV12_DNDI_PA.g4a		\
+	NV12_DNUV_NV12.g4a		\
+	NV12_DN_422CP.g4a		\
+	NV12_DN_NV12.g4a		\
+	PA_AVS_Buf_0.g4a		\
+	PA_AVS_Buf_1.g4a		\
+	PA_AVS_Buf_2.g4a		\
+	PA_AVS_Buf_3.g4a		\
+	PA_DI_422CP.g4a			\
+	PA_DI_PA.g4a			\
+	PA_DNDI_422CP.g4a		\
+	PA_DNDI_PA.g4a			\
+	PA_DNUV_PA.g4a			\
+	PA_DN_422CP.g4a			\
+	PA_DN_PA.g4a			\
+	PL2_AVS_Buf_0.g4a		\
+	PL2_AVS_Buf_1.g4a		\
+	PL2_AVS_Buf_2.g4a		\
+	PL2_AVS_Buf_3.g4a		\
+	PL3_AVS_Buf_0.g4a		\
+	PL3_AVS_Buf_1.g4a 		\
+	PL3_AVS_Buf_2.g4a		\
+	PL3_AVS_Buf_3.g4a		\
+	PL3_DNDI_422CP.g4a		\
+	PL3_DNDI_PA.g4a			\
+	PL3_DNUV_PL3.g4a		\
+	PL3_DN_422CP.g4a		\
+	PL3_DN_PL3.g4a			\
+	PL_DI_422CP.g4a			\
+	PL_DI_PA.g4a			\
+	Save_AVS_PA.g4a  		\
+	Save_AVS_PL3.g4a		\
+	Save_AVS_NV12.g4a		\
+	Save_AVS_RGB.g4a		\
+	Set_AVS_Buf_0123_BGRA.g4a	\
+	Set_AVS_Buf_0123_PL2.g4a	\
+	Set_AVS_Buf_0123_PL3.g4a	\
+	Set_AVS_Buf_0123_VUYA.g4a	\
+	Set_AVS_Buf_0123_VYUA.g4a	\
+	Set_Layer_0.g4a			\
+	VP_Setup.g4a			\
+	$(NULL)
+
+INTEL_PP_ASM = $(INTEL_PP_G7B:%.g7b=%.asm)
+INTEL_PP_GEN7_ASM = $(INTEL_PP_G7B:%.g7b=%.g7s)
+
+INTEL_PP_G75B = $(INTEL_PP_G7B:%.g7b=%.g75b)
+
+TARGETS  =
+if HAVE_GEN4ASM
+TARGETS += $(INTEL_PP_G7B) $(INTEL_PP_G75B)
+endif
+
+all-local: $(TARGETS)
+
+SUFFIXES = .g7b .g7s .asm
+
+$(INTEL_PP_GEN7_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G4A)
+.asm.g7s:
+	$(AM_V_GEN)cpp $< > _pp0.$@;		\
+	../../gpp.py _pp0.$@ $@;	\
+	rm _pp0.$@
+.g7s.g7b:
+	$(AM_V_GEN)intel-gen4asm -a -o $@ -g 7 $<
+
+.g7s.g75b:
+	$(AM_V_GEN)intel-gen4asm -a -o $@ -g 7.5 $<
+
+CLEANFILES = $(INTEL_PP_GEN7_ASM)
+
+EXTRA_DIST = \
+	$(INTEL_PP_G7B)		\
+	$(INTEL_PP_G75B)
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
diff --git a/src/shaders/post_processing/gen7/NV12_DI_NV12.g4a b/src/shaders/post_processing/gen7/NV12_DI_NV12.g4a
new file mode 100644
index 0000000..668b61f
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DI_NV12.g4a
@@ -0,0 +1,392 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   57    // Total instruction count
+//    1    // Total kernel count
+
+.kernel NV12_DI_NV12
+.code
+
+
+
+// FileName:	DI.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DI only case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:    DI_Save_NV12_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+// add (4) a0.4<1>:uw   r2.28<4;4,1>:ub   608:w               // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r28.0<1>:ud     r0.0<8;8,1>:ud
+mov (1) r28.0<1>:d      r7.0<0;1,0>:w            { NoDDClr }          // H. block origin need to be doubled
+mov (1) r28.1<1>:d      r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r28.2<1>:ud     0x3000F:ud          { NoDDChk }          // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r28<8;8,1>:ud
+mov (8) r23.0<1>:ud       r28<8;8,1>:ud
+
+//Bottom field Y
+mov (8)   mudMSGHDR_DI_OUT1(1)<1>		udDNDI_RESP(0,0)
+mov (8)   mudMSGHDR_DI_OUT1(2)<1>		udDNDI_RESP(0,8)
+// Top field Y
+mov (8)   mudMSGHDR_DI_OUT2(1)<1>	udDNDI_RESP(4,0)
+mov (8)   mudMSGHDR_DI_OUT2(2)<1>	udDNDI_RESP(4,8)
+
+//Change origin to U/V block
+asr (1) r28.1<1>:d		r28.1<0;1,0>:d    1:w			{ NoDDClr }    		// U/V block origin should be half of Y's
+mov (1) r28.2<1>:ud     0x1000F:ud		{ NoDDChk }    		// Block width and height (16x2)
+
+// Bottom field U/V
+mov (16)   r22.0<2>:ub       ubDNDI_RESP(2, 1)<32;8,2>	{ NoDDClr }
+mov (16)   r22.1<2>:ub       ubDNDI_RESP(2, 0)<32;8,2>	{ NoDDChk }
+
+// Top field U/V
+mov (16)   r27.0<2>:ub       ubDNDI_RESP(6, 1)<32;8,2>	{ NoDDClr }
+mov (16)   r27.1<2>:ub       ubDNDI_RESP(6, 0)<32;8,2>	{ NoDDChk }
+
+//copy message desrcptor to the message header
+mov (8) r21<1>:ud			r28<8;8,1>:ud
+mov (8) r26<1>:ud			r28<8;8,1>:ud
+
+//Send out Y component on previous frame to surface
+send (8)    null<1>:d    r18	0x5    0x60A801B:ud
+//Send out Y component on current frame to surface
+send (8)    null<1>:d    r23 	0x5    0x60A801E:ud
+//Send out U/V component on previous frame to surface
+send (8)    null<1>:d    r21	0x5    0x40A801C:ud
+//Send out U/V component on current frame to surface
+send (8)    null<1>:d    r26	0x5    0x40A801F:ud
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/NV12_DNDI_422CP.g4a b/src/shaders/post_processing/gen7/NV12_DNDI_422CP.g4a
new file mode 100644
index 0000000..0c0002f
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DNDI_422CP.g4a
@@ -0,0 +1,557 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//  116    // Total instruction count
+//    1    // Total kernel count
+
+.kernel NV12_DNDI_422CP
+.code
+
+
+
+// FileName:	DNDI_PL_Core.asm
+// Author:		Tatiya, Rupesh
+
+
+
+// FileName:	DNDI_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4BE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+	mov (1)    mudMSGHDR_HIST(1)<1>		udDNDI_RESP(9,0)<0;1,0>		// Move denoise history to MRF (4x1)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x3:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Load_UV_NV12_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x4 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Load_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x4 block through DATAPORT 
+
+
+	add (2)		r27.0<1>:d				r7.0<2;2,1>:w				r4.4<2;2,1>:w				// Source Y Block origin
+	asr (1)  	r27.1<1>:d     			r27.1<0;1,0>:d       	1:w   						{ NoDDClr }		// U/V block origin should be half of Y's
+	mov (1)		r27.2<1>:ud				0x1000F:ud  					{ NoDDChk }		// U/V block width and height (8x4)
+    mov (8)     mudMSGHDR_UVCOPY(0)<1>    	r27.0<8;8,1>:ud
+	send (8)	udDNDI_UV_RESP(0)<1>		r36	0x4	0x2190001:ud
+
+
+
+// FileName:	DN_Save_Y_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save one 16x4 blocks of Y channel of DN output for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1)  null<1>:w               r1.28<0;1,0>:ub     1:w
+
+mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            						// message header   
+mov (2)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<2;2,1>:w  				{ NoDDClr }        	// X origin * 2 (422 output)
+mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x3000F:ud		{ NoDDChk }        	// block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+
+	mov (4)     mudMSGHDR_DN_OUT(1,0)<1>    udDNDI_RESP(10,0)<4;4,1> 	{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(1,4)<1>   	udDNDI_RESP(4,4)<4;4,1> 		{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+	mov (4)     mudMSGHDR_DN_OUT(2,0)<1>    udDNDI_RESP(10,4)<4;4,1> 	{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(2,4)<1>   udDNDI_RESP(5,4)<4;4,1> 		{ NoDDChk }		// 1st field luma from current frame (line 1,3)    
+
+	jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+	mov (4)     mudMSGHDR_DN_OUT(1,0)<1>    udDNDI_RESP(4,0)<4;4,1> 		{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(1,4)<1>   	udDNDI_RESP(10,0)<4;4,1> 	{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+	mov (4)     mudMSGHDR_DN_OUT(2,0)<1>    udDNDI_RESP(5,0)<4;4,1> 		{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(2,4)<1>   udDNDI_RESP(10,4)<4;4,1> 	{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8)    null<1>:d    r31.0		0x5    0x60A8018:ud     
+
+
+
+// FileName:    DI_Save_422CP_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in 422 format to Color Pipe (IECP)
+
+
+.declare mubMSGHDR_DI_OUT1_1  Base=r18.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT1_2  Base=r21.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_1  Base=r24.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_2  Base=r27.0      ElementSize=1  Type=ub
+
+
+mov (8) r27.0<1>:ud     r0.0<8;8,1>:ud
+shl (1) r27.0<1>:ud     r7.0<0;1,0>:w            1:w  { NoDDClr }          // H. block origin need to be doubled
+mov (1) r27.1<1>:ud     r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r27.2<1>:ud     0x3000F:ud        { NoDDClr, NoDDChk }       // Block width and height (16x8)
+
+//M0.3  - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1)  r27.3<1>:ud     r2.4<0;1,0>:ud     r7.26<0;1,0>:b     { NoDDChk }
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r27<8;8,1>:ud
+mov (8) r24.0<1>:ud       r27<8;8,1>:ud
+
+
+// Pack 2nd field Y; First 8x4 block
+	mov (8)		mubMSGHDR_DI_OUT1_1(1)<2>			ubDNDI_RESP(0,0)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(1,16)<2>		ubDNDI_RESP(0,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(2)<2>			ubDNDI_RESP(0,32)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(2,16)<2>		ubDNDI_RESP(0,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; First 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,1)<4>   		ubDNDI_RESP(2,1)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,17)<4>   	ubDNDI_RESP(2,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,3)<4>	  	ubDNDI_RESP(2,0)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,19)<4>   	ubDNDI_RESP(2,16)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,1)<4>   		ubDNDI_RESP(2,33)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,17)<4>   	ubDNDI_RESP(2,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,3)<4>	  	ubDNDI_RESP(2,32)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,19)<4>   	ubDNDI_RESP(2,48)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+
+// Pack 2nd field Y; Second 8x4 block
+mov	(8)	r21.0<1>:ud		r18.0<8;8,1>:ud
+add	(1)	r21.0<1>:ud		r21.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DI_OUT1_2(1)<2>			ubDNDI_RESP(0,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(1,16)<2>		ubDNDI_RESP(0,24)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(2)<2>			ubDNDI_RESP(0,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(2,16)<2>		ubDNDI_RESP(0,56)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; Second 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,1)<4>   		ubDNDI_RESP(2,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,17)<4>		ubDNDI_RESP(2,25)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,3)<4>   		ubDNDI_RESP(2,8)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,19)<4>		ubDNDI_RESP(2,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,1)<4>   		ubDNDI_RESP(2,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,17)<4>		ubDNDI_RESP(2,57)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,3)<4>   		ubDNDI_RESP(2,40)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,19)<4>		ubDNDI_RESP(2,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+send (8)    null<1>:d    r18.0   0x5     0x60A801B:ud
+send (8)    null<1>:d    r21.0   0x5     0x60A801B:ud
+
+// Pack 1st field Y; 1st 8x4 block
+	mov (8)		mubMSGHDR_DI_OUT2_1(1)<2>			ubDNDI_RESP(4,0)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(1,16)<2>		ubDNDI_RESP(4,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(2)<2>			ubDNDI_RESP(4,32)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(2,16)<2>		ubDNDI_RESP(4,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 1st field U,V; 1st 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,1)<4>   		ubDNDI_RESP(6,1)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,17)<4>   	ubDNDI_RESP(6,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,3)<4>	  	ubDNDI_RESP(6,0)<8;4,2>		    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,19)<4>   	ubDNDI_RESP(6,16)<8;4,2>	    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,1)<4>   		ubDNDI_RESP(6,33)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,17)<4>   	ubDNDI_RESP(6,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,3)<4>	  	ubDNDI_RESP(6,32)<8;4,2>		    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,19)<4>   	ubDNDI_RESP(6,48)<8;4,2>	    { NoDDChk }    	// copy line of V directly to memory as optimization
+
+// Pack 1st field Y; 2nd 8x4 block
+mov	(8)	r27.0<1>:ud		r24.0<8;8,1>:ud
+add	(1)	r27.0<1>:ud		r27.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DI_OUT2_2(1)<2>			ubDNDI_RESP(4,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(1,16)<2>		ubDNDI_RESP(4,24)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(2)<2>			ubDNDI_RESP(4,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(2,16)<2>		ubDNDI_RESP(4,56)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 1st field U, V; 2nd 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,1)<4>   		ubDNDI_RESP(6,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,17)<4>		ubDNDI_RESP(6,25)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,3)<4>   		ubDNDI_RESP(6,8)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,19)<4>		ubDNDI_RESP(6,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,1)<4>   		ubDNDI_RESP(6,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,17)<4>		ubDNDI_RESP(6,57)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,3)<4>   		ubDNDI_RESP(6,40)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,19)<4>		ubDNDI_RESP(6,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+send (8)    null<1>:d    r24.0     0x5     0x60A801E:ud
+send (8)    null<1>:d    r27.0     0x5     0x60A801E:ud
+
+
+
+// FileName:	DN_Save_UV_NV12_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x4 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Save_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x4 block through DATAPORT 
+
+
+//Reuse the header from Load component
+
+	mov (8)		mudMSGHDR_UVCOPY(1)<1>		udDNDI_UV_RESP(0)<8;8,1>
+	send (8)	null<1>:d    r36	0x5    0x40A8019:ud 
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/NV12_DNDI_PA.g4a b/src/shaders/post_processing/gen7/NV12_DNDI_PA.g4a
new file mode 100644
index 0000000..7fd55b0
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DNDI_PA.g4a
@@ -0,0 +1,495 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   86    // Total instruction count
+//    1    // Total kernel count
+
+.kernel NV12_DNDI_PA
+.code
+
+
+
+// FileName:	DNDI_PL_Core.asm
+// Author:		Tatiya, Rupesh
+
+
+
+// FileName:	DNDI_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4BE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+	mov (1)    mudMSGHDR_HIST(1)<1>		udDNDI_RESP(9,0)<0;1,0>		// Move denoise history to MRF (4x1)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x3:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Load_UV_NV12_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x4 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Load_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x4 block through DATAPORT 
+
+
+	add (2)		r27.0<1>:d				r7.0<2;2,1>:w				r4.4<2;2,1>:w				// Source Y Block origin
+	asr (1)  	r27.1<1>:d     			r27.1<0;1,0>:d       	1:w   						{ NoDDClr }		// U/V block origin should be half of Y's
+	mov (1)		r27.2<1>:ud				0x1000F:ud  					{ NoDDChk }		// U/V block width and height (8x4)
+    mov (8)     mudMSGHDR_UVCOPY(0)<1>    	r27.0<8;8,1>:ud
+	send (8)	udDNDI_UV_RESP(0)<1>		r36	0x4	0x2190001:ud
+
+
+
+// FileName:	DN_Save_Y_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save one 16x4 blocks of Y channel of DN output for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1)  null<1>:w               r1.28<0;1,0>:ub     1:w
+
+mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            						// message header   
+mov (2)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<2;2,1>:w  				{ NoDDClr }        	// X origin * 2 (422 output)
+mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x3000F:ud		{ NoDDChk }        	// block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+
+	mov (4)     mudMSGHDR_DN_OUT(1,0)<1>    udDNDI_RESP(10,0)<4;4,1> 	{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(1,4)<1>   	udDNDI_RESP(4,4)<4;4,1> 		{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+	mov (4)     mudMSGHDR_DN_OUT(2,0)<1>    udDNDI_RESP(10,4)<4;4,1> 	{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(2,4)<1>   udDNDI_RESP(5,4)<4;4,1> 		{ NoDDChk }		// 1st field luma from current frame (line 1,3)    
+
+	jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+	mov (4)     mudMSGHDR_DN_OUT(1,0)<1>    udDNDI_RESP(4,0)<4;4,1> 		{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(1,4)<1>   	udDNDI_RESP(10,0)<4;4,1> 	{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+	mov (4)     mudMSGHDR_DN_OUT(2,0)<1>    udDNDI_RESP(5,0)<4;4,1> 		{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(2,4)<1>   udDNDI_RESP(10,4)<4;4,1> 	{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8)    null<1>:d    r31.0		0x5    0x60A8018:ud     
+
+
+
+// FileName:    DI_Save_PA_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+add (4) a0.4<1>:uw   r2.28<4;4,1>:ub   608:w               // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud     r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d      r7.0<0;1,0>:w            1:w  { NoDDClr }          // H. block origin need to be doubled
+mov (1) r27.1<1>:d      r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r27.2<1>:ud     0x3001F:ud          { NoDDChk }          // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r27<8;8,1>:ud
+mov (8) r23.0<1>:ud       r27<8;8,1>:ud
+
+// Pack 2nd field Y
+    mov (16)    r[a0.4, 0]<2>      ubDNDI_RESP(0,0)               { NoDDClr }
+    mov (16)    r[a0.4, 32]<2>      ubDNDI_RESP(0,16)               { NoDDClr }
+    mov (16)    r[a0.4, 64]<2>      ubDNDI_RESP(0,32)               { NoDDClr }
+    mov (16)    r[a0.4, 96]<2>      ubDNDI_RESP(0,48)               { NoDDClr }
+// Pack 2nd field U
+    mov (8)     r[a0.5, 0]<4>      ubDNDI_RESP(2,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 32]<4>      ubDNDI_RESP(2,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 64]<4>      ubDNDI_RESP(2,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 96]<4>      ubDNDI_RESP(2,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 2nd field V
+    mov (8)     r[a0.6, 0]<4>      ubDNDI_RESP(2,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 32]<4>      ubDNDI_RESP(2,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 64]<4>      ubDNDI_RESP(2,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 96]<4>      ubDNDI_RESP(2,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+// Pack 1st field Y
+    mov (16)    r[a0.4, 160]<2>    ubDNDI_RESP(4,0)               { NoDDClr }
+    mov (16)    r[a0.4, 192]<2>    ubDNDI_RESP(4,16)               { NoDDClr }
+    mov (16)    r[a0.4, 224]<2>    ubDNDI_RESP(4,32)               { NoDDClr }
+    mov (16)    r[a0.4, 256]<2>    ubDNDI_RESP(4,48)               { NoDDClr }
+// Pack 1st field U
+    mov (8)     r[a0.5, 160]<4>    ubDNDI_RESP(6,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 192]<4>    ubDNDI_RESP(6,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 224]<4>    ubDNDI_RESP(6,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 256]<4>    ubDNDI_RESP(6,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 1st field V
+    mov (8)     r[a0.6, 160]<4>    ubDNDI_RESP(6,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 192]<4>    ubDNDI_RESP(6,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 224]<4>    ubDNDI_RESP(6,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 256]<4>    ubDNDI_RESP(6,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+//save the previous frame
+send (8)    null<1>:d    r18.0     0x5     0xA0A801B:ud
+
+//save the current frame
+send (8)    null<1>:d    r23.0     0x5     0xA0A801E:ud
+
+
+
+// FileName:	DN_Save_UV_NV12_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x4 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Save_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x4 block through DATAPORT 
+
+
+//Reuse the header from Load component
+
+	mov (8)		mudMSGHDR_UVCOPY(1)<1>		udDNDI_UV_RESP(0)<8;8,1>
+	send (8)	null<1>:d    r36	0x5    0x40A8019:ud 
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/NV12_DNUV_NV12.g4a b/src/shaders/post_processing/gen7/NV12_DNUV_NV12.g4a
new file mode 100644
index 0000000..f560ef8
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DNUV_NV12.g4a
@@ -0,0 +1,2491 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+// 1153    // Total instruction count
+//    1    // Total kernel count
+
+
+.kernel NV12_DNUV_NV12
+.code
+
+
+
+//Module		: DN_UV_Setup
+//Author		: Tatiya, Rupesh
+//Description	: Initial Set-up for DN_UV
+
+
+
+
+// Module name	: ChromaDenoise.inc
+// Author		: Tatiya, Rupesh
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//======================================================
+//Interface for serpent mode Chroma Denoise, added by Le
+//======================================================
+//r1
+
+
+//noise history thresholds (low and high)
+
+
+//temporal difference thresholds (high and low)
+
+
+//noise history thresholds (low and high)
+//#define ubNoiseHistMaxHigh  r1.22
+//#define ubNoiseHistMaxLow  r1.23
+//#define ubNoiseHistDeltaHigh  r1.24
+//#define ubNoiseHistDeltaLow  r1.25
+
+//Gaussian thresholds
+
+
+//temporal difference thresholds (default)
+
+
+//r2
+//history thresholds (default)
+
+
+//denoise factor  (0-63)
+
+
+//====================== Binding table (Explicit To DNUV)=========================================
+//Used by DN_UV kernels
+
+
+	//Pointer to Current Frame UV
+
+
+//r1-r6
+	//CURBE GRFs used as TEMP : Used for max computation and storing max temporarily. : r1-r6
+
+
+	.declare	ubCURBE_TEMP	Base=r1.0	ElementSize=1	Type=ub
+	.declare	uwCURBE_TEMP	Base=r1.0	ElementSize=2	Type=uw
+	.declare	wCURBE_TEMP		Base=r1.0	ElementSize=2	Type=w
+	.declare	fCURBE_TEMP		Base=r1.0	ElementSize=4	Type=f
+	.declare	udCURBE_TEMP		Base=r1.0	ElementSize=4	Type=ud
+	.declare	uwMAX_ABS_DIFF	Base=r5.0	ElementSize=2	Type=uw
+
+	//r1
+
+
+	//r3
+
+
+    //r4
+
+//r7
+	//All of the following has to defined in Same GRF for optimal performance.
+
+
+//r8-24
+    //Previous Frame UV
+
+	.declare	udPREV_UV		Base=r8.0	ElementSize=4	Type=ud
+	.declare	ubPREV_UV		Base=r8.0	ElementSize=1	Type=ub
+
+
+//r25-48
+	//TEMP Space for any Usage.
+
+
+//=========================================================================
+//Definations and declarations for serpent mode Chroma Denoise, added by Le
+//========================================================================= 	  
+
+
+	.declare	udGNE_UV		Base=r24.0	ElementSize=4	Type=ud
+  .declare	fGNE_UV		Base=r24.0	ElementSize=4	Type=f
+  .declare	ubGNE_UV		Base=r24.0	ElementSize=1	Type=ub
+
+  .declare	udMSGHDR_BNE_SERP	Base=r25.0	ElementSize=4	Type=ud
+  .declare	udMSGSRC_BNE_SERP	Base=r26.0	ElementSize=4	Type=ud
+
+
+  .declare	ubDN_UV_Thresholds Base=r26.0	ElementSize=1	Type=ub
+  .declare	ubDN_UV_Thresholds_Temp  Base=r27.0	ElementSize=1	Type=ub
+  .declare	udDN_UV_Thresholds Base=r26.0	ElementSize=4	Type=ud
+  .declare	udDN_UV_Thresholds_Temp Base=r27.0	ElementSize=4	Type=ud
+  .declare	fDN_UV_Thresholds Base=r26.0	ElementSize=4	Type=f
+  .declare	fDN_UV_Thresholds_Temp Base=r27.0	ElementSize=4	Type=f 	
+
+
+//====================================================================================
+
+
+	//TEMP23: To hold V data for PL3 surfaces
+	.declare	udCURR_V_TEMP	Base=r25.0	ElementSize=4	Type=ud
+	.declare	ubCURR_V_TEMP	Base=r25.0	ElementSize=1	Type=ub
+
+	//GRFs to calculate Median: r25-r42
+	.declare	ubMEDIAN_TEMP	Base=r25.0	ElementSize=1	Type=ub
+
+	//18 GRFs to hold difference : r25-r42
+	.declare	wDIFF			Base=r25.0	ElementSize=2	Type=w
+	.declare	uwDIFF			Base=r25.0	ElementSize=2	Type=uw
+
+	//Temporal Diff
+	.declare	wDIFF_TEMPORAL			Base=r25.0	ElementSize=2	Type=w
+	.declare	ubDIFF_TEMPORAL			Base=r25.0	ElementSize=1	Type=ub
+
+	//4 GRFs to hold Sobel Value : r43-46
+	.declare	wSOBEL_X	Base=r43.0	ElementSize=2	Type=w
+	.declare	uwSOBEL		Base=r43.0	ElementSize=2	Type=uw
+
+
+	//2 GRFs to hold SOAD temporarily: r47-48
+	.declare	uwSOAD			Base=r47.0	ElementSize=2	Type=uw
+
+	//Temp GRFs to hold extra YUYV pixels: r43-r48
+	.declare	ubTEMP5			Base=r43.0	ElementSize=1	Type=ub
+
+	//Temp GRFs in Median Calculation: r47-r48
+	.declare	ubTEMP1			Base=r47.0	ElementSize=1	Type=ub
+
+	.declare	uwTEMP0			Base=r48.0	ElementSize=2	Type=uw
+	.declare	ubTEMP0			Base=r48.0	ElementSize=1	Type=ub
+
+	//Temp Space to store Median : r49-50
+
+	.declare	ubMEDIAN	Base=r49.0	ElementSize=1	Type=ub
+
+//r49
+
+
+//r50
+    //Message Source
+
+
+//r51
+	//DN_UV History Surface
+
+	.declare	udHIST_UV		Base=r51.0	ElementSize=4	Type=ud
+	.declare	ubHIST_UV		Base=r51.0	ElementSize=1	Type=ub
+
+//r52 - r91
+	//r52
+	//Current Frame UV
+
+
+	.declare	udCURR_UV		Base=r52.0	ElementSize=4	Type=ud
+	.declare	ubCURR_UV		Base=r52.0	ElementSize=1	Type=ub
+
+	//r54
+	//CURBE COPY
+
+
+	//r55
+
+
+	.declare 	uwSOAD_MIN_8x4		Base=r56.0	ElementSize=2	Type=uw
+
+	//r61
+
+
+	//r62
+
+
+	//History Surface Temp Origin
+
+
+    //r63
+    //Current Frame Y Temp Origin
+
+
+	//BNE Surface Origin
+
+
+    //r70
+
+	.declare	uwDIFF_TEMPORAL_SUM4x4	Base=r70.0	ElementSize=2	Type=uw  //4 GRFs
+
+	//r74-91 : For Saving Dest UV (PL2/PL3)
+
+
+	.declare	ubMSGPAYLOAD_UV0	Base=r75.0	ElementSize=1	Type=ub
+
+
+	.declare	ubMSGPAYLOAD_U		Base=r75.0	ElementSize=1	Type=ub
+
+
+	.declare	ubMSGPAYLOAD_UV1	Base=r84.0	ElementSize=1	Type=ub
+
+
+	.declare	ubMSGPAYLOAD_V		Base=r84.0	ElementSize=1	Type=ub
+
+	//r90
+
+	.declare	uwDIFF_TEMPORAL_SUM4x4_FINAL	Base=r90.0	ElementSize=2	Type=uw  //2 GRFs
+
+//r92-127
+	//Current Frame Y
+
+
+	//r92
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_0		Base=r92	ElementSize=2	Type=uw
+	//r101
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_1		Base=r101	ElementSize=2	Type=uw
+	//r110
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_2		Base=r110	ElementSize=2	Type=uw
+	//r119
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_3		Base=r119	ElementSize=2	Type=uw
+
+	.declare	udCURR_Y0		Base=r93.0	ElementSize=4	Type=ud
+    .declare	ubCURR_Y0		Base=r93.0	ElementSize=1	Type=ub
+    .declare	udCURR_Y1		Base=r102.0	ElementSize=4	Type=ud
+	.declare	ubCURR_Y1		Base=r102.0	ElementSize=1	Type=ub
+	.declare	udCURR_Y2		Base=r111.0	ElementSize=4	Type=ud
+	.declare	ubCURR_Y2		Base=r111.0	ElementSize=1	Type=ub
+	.declare	udCURR_Y3		Base=r120.0	ElementSize=4	Type=ud
+	.declare	ubCURR_Y3		Base=r120.0	ElementSize=1	Type=ub
+
+	//r92: To hold U data for PL3 surfaces
+	.declare	udCURR_U_TEMP		Base=r92.0	ElementSize=4	Type=ud
+    .declare	ubCURR_U_TEMP		Base=r92.0	ElementSize=1	Type=ub
+
+    //r112: To hold U data for PL3 surfaces
+	.declare	udPREV_U_TEMP		Base=r112.0	ElementSize=4	Type=ud
+	.declare	ubPREV_U_TEMP		Base=r112.0	ElementSize=1	Type=ub
+
+	//r120: To hold U data for PL3 surfaces
+	.declare	udPREV_V_TEMP		Base=r120.0	ElementSize=4	Type=ud
+	.declare	ubPREV_V_TEMP		Base=r120.0	ElementSize=1	Type=ub
+
+
+	// Initialize message source with r0.
+	mov (8)   r50.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r92.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r101.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r110.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r119.0<1>:ud		r0.0<8;8,1>:ud
+
+
+
+//Module Name 	: 	DN_UV_PL2_Load_Curr_Frame_UV
+//Author		:	Tatiya, Rupesh
+//Description	:	Loads Current Frame UV data for PL2 input.
+
+
+
+//Module name 	:  DN_UV_Load_Curr_Frame_UV
+//Author		:  Tatiya, Rupesh
+//Description	:  Loads Current Frame (UV only).
+//				   We need 4 extra rows (2 per field) and 2 extra pixel (1 each side) for both U and V each.
+//				   The processing size is 16x16 U and V each. So we need : U size - 18x20, V size - 18x20, UV size - 36x20, YUYV size - 72x20.
+
+
+
+
+//36x20 interleaved UV block is partitioned as follows:
+//				<------ 18 --------> <--------18 ------->
+//				-----------------------------------------
+//				|		20x8   A 	!|    20x8     D    !
+//				|      (overlapped) !|   (overlapped) 	!
+//				|-------------------!|------------------!
+//				|       20x8   B    !|      20x8   E    !
+//				|      (overlapped) !|     (overlapped) !
+//				|-------------------!|------------------!
+//				|		20x4   C    !|		20x4   F    !
+//				|     (overlapped)	!|    (overlapped)	!
+//				-----------------------------------------
+//
+// Cordinates: (x-2, y-2), (x+14, y-2), (x-2, y+6), (x+14, y+6), (x-2, y+14), (x+14, y+14)
+
+	//UV surface origin: (ORIX, ORIY/2)
+	add  (2)	r7.4<1>:w		r7.0<2;2,1>:w	 	r4.4<2;2,1>:w	 { AccWrEn } // Source Block origin
+	shr  (1)	r7.5<1>:w		acc0.5<0;1,0>:w		1:w
+	mov  (2)	acc0.0<1>:d							r7.4<2;2,1>:w
+
+	//A
+	add  (2)   	r50.0<1>:d	acc0.0<2;2,1>:d		-2:d					{ AccWrEn }
+ 	mov  (1)   	r50.2<1>:ud	0x70013:ud			
+ 	send (8)	udCURR_UV(0)<1>			r50		0x4	0x2890004:ud
+
+	//B
+ 	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			8:d
+ 	send (8)	udCURR_UV(8)<1>			r50		0x4	0x2890004:ud
+
+	//C
+	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			16:d   				
+	mov  (1)   	r50.2<1>:ud	0x30013:ud			
+ 	send (8)	udCURR_UV(16)<1>		r50		0x4	0x2490004:ud
+
+	//D
+	add  (1)    r50.0<1>:d	acc0.0<0;1,0>:d			16:d				{ AccWrEn }
+	mov  (1)    r50.1<1>:d	acc0.1<0;1,0>:d								
+	mov  (1)   	r50.2<1>:ud	0x70013:ud			
+ 	send (8)	udCURR_UV(20)<1>		r50		0x4	0x2890004:ud
+
+	//E
+ 	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			8:d
+ 	send (8)	udCURR_UV(28)<1>		r50		0x4	0x2890004:ud
+
+	//F
+ 	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			16:d   					
+	mov  (1)   	r50.2<1>:ud	0x30013:ud				
+ 	send (8)	udCURR_UV(36)<1>		r50		0x4	0x2490004:ud
+
+ 	//History Origin, Current Y origin and BNE surface origin - all are in inline GRF. Use , . -rT.
+
+ 	//Calculate Origin For History Surface: (ORIX/4, ORIY/8)
+	mov  (16)   acc0.0<1>:w						r7.0<0;2,1>:w
+	shr  (1)	r7.2<1>:w		acc0.2<0;1,0>:w	2:w			
+	shr  (1)	r7.3<1>:w		acc0.3<0;1,0>:w	3:w			
+
+	//Calculate Origin For BNE Surface: (ORIX/8, ORIY/16)
+	shr  (1)	r7.6<1>:w		acc0.6<0;1,0>:w		3:w		
+	shr  (1)	r7.7<1>:w		acc0.7<0;1,0>:w		4:w		
+
+
+
+//Module Name 	: 	DN_UV_PL2_Load_Prev_Frame_UV
+//Author		:	Tatiya, Rupesh
+//Description	:	Loads Pevious Frame UV data for PL2 input.
+
+
+
+//Module Name 	: 	DN_UV_Load_Prev_Frame_UV
+//Author		:	Tatiya, Rupesh
+//Description	:   Loads Prev Frame (UV only). U size - 16x16, V size - 16x16, UV size - 32x16, YUYV size - 64x16.
+
+
+
+
+	mov  (2)	r50.0<1>:d		r7.4<2;2,1>:w			{ AccWrEn } 	// Source lock origin
+	mov  (1)	r50.2<1>:ud		0xF000F:ud  						// U/V block width and height (16x16)
+	send (8)	udPREV_UV(0)<1>		r50		0x4	0x2890001:ud
+
+	add  (1)	r50.0<1>:ud 		acc0.0<0;1,0>:d		16:w										// Add 16 to X origin
+	send (8)	udPREV_UV(8)<1>		r50		0x4	0x2890001:ud
+
+
+	//TODO - See if History loading can be combined with Prev Frame Load. - rT
+
+
+//Module name 	:  DN_UV_Load_Hist_UV
+//Author		:  Tatiya, Rupesh
+//Description	:  Load DN History for UV denoise. 4x4 for each U & V.
+
+
+
+
+	mov  (2)	r50.0<1>:d	r7.2<2;2,1>:w			
+	mov  (1)	r50.2<1>:ud	0x30007:ud  		
+	send (8)	udHIST_UV(0)<1>			r50		0x4	0x2190022:ud
+
+
+
+//Module Name 	: DN_UV_420_Load_Curr_Frame_Y
+//Author		: Tatiya, Rupesh
+//Description	: Load Curr Frame Y data for 420 Input
+
+
+
+//Module Name 	: DN_UV_Load_Curr_Frame_Y
+//Author		: Tatiya, Rupesh
+//Description	: Loads Y of Current frame.
+
+
+
+
+	//For 16x16 U and 16x16 V for 420, we need to read 32x32 Y.
+
+	mov (8)		acc0.0<1>:ud		r0.0<8;8,1>:ud
+	mov (1)		acc0.2<1>:ud		0xF000F:ud
+	add (2)		acc0.0<1>:ud		r7.0<2;2,1>:w		r4.4<2;2,1>:w
+
+	mov (8)     r92.0<1>:ud	acc0.0<8;8,1>:ud
+
+	mov (8)     r101.0<1>:ud	acc0.0<8;8,1>:ud
+	mov (8)     r110.0<1>:ud	acc0.0<8;8,1>:ud
+	mov (8)     r119.0<1>:ud	acc0.0<8;8,1>:ud
+
+	add (1)		r101.1<1>:d 	acc0.1<0;1,0>:d   		16:d
+
+	add (1)		r110.0<1>:d 	acc0.0<0;1,0>:d   		16:d
+
+	add (2)		r119.0<1>:d 	acc0.0<2;2,1>:d   		16:d
+
+	send (8)	udCURR_Y0(0)<1>		r92		0x4	0x2890003:ud
+	send (8)	udCURR_Y1(0)<1>		r101		0x4	0x2890003:ud
+	send (8)	udCURR_Y2(0)<1>		r110		0x4	0x2890003:ud
+	send (8)	udCURR_Y3(0)<1>		r119		0x4	0x2890003:ud
+
+
+
+//Module Name : DN_UV_Noise_Detection_UV
+//Author	  : Tatiya, Rupesh
+//Description : Performs noise detection on 16x16 U and 16x16 V each.
+
+
+
+//Module Name 	: DN_UV_Move_CURBE_Inline_UV.asm
+//Author		: Tatiya, Rupesh
+
+
+
+
+	//Mov CURBE data to another space - so that it can be used as Temp Space --> r1 - r6
+	mov (4)	r54.28<1>:ub		r2.28<4;4,1>:ub		//Dest. YUY2 offset
+	mov (2) r54.5<1>:ud		r4.0<4;2,2>:ud		//Src YUY2 offset and Origin offset
+	mov (4)	r55.28<1>:ub		r1.0<4;4,1>:ub
+
+	mov (8) r61.20<1>:ub		r1.4<8;8,1>:ub		
+	mov (4) r61.28<1>:ub		r1.12<4;4,1>:ub		
+
+	//Move Inline Data to another space - so that it can be used as Temp Space --> r7
+	mov (4) r62.10<1>:w				r7.0<4;4,1>:w
+	mov (4) r63.10<1>:w		r7.4<4;4,1>:w
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	mov (1) a0.0:uw				1664:uw				
+	mov (1)	a0.1:uw	1816:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				1792:uw			
+	mov (1)	a0.1:uw	1820:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				1920:uw			
+	mov (1)	a0.1:uw	1848:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2048:uw			
+	mov (1)	a0.1:uw	1852:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	mov (1) a0.0:uw				2304:uw			
+	mov (1)	a0.1:uw	1880:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2432:uw			
+	mov (1)	a0.1:uw	1884:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2560:uw			
+	mov (1)	a0.1:uw	1912:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2688:uw			
+	mov (1)	a0.1:uw	1916:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+//Module 		: DN_UV_Noise_Reduction_UV
+//Author		: Tatiya, Rupesh
+//Description	: Performs Noise Reduction on 16x16 U and 16x16 V.
+//Tasks			: 1. Update weight history
+//				  2. Find if it block is motion block
+//				  3. Compute Denoised Pixel.
+
+
+
+
+//History is 1+1 byte every 4x4 U and 4x4 V.
+
+	cmp.l.f0.0 (16) null<1>:w		ubHIST_UV(0,0)<16;16,1>		r61.20<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w		ubHIST_UV(0,0)<16;16,1>		r61.22<0;2,1>:ub
+
+	mov (16)	uwCURBE_TEMP(0)<1>	0:w
+	mov (16)	uwCURBE_TEMP(1)<1>	0:w
+
+	//Compute diff betn curr and prev. - First 16 lines
+	// 8 lines here
+    add (16)	wDIFF_TEMPORAL(0)<1>			ubCURR_UV(2,2)<16;16,1>		-ubPREV_UV(0,0)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(1)<1>			ubCURR_UV(3,2)<16;16,1>		-ubPREV_UV(0,16)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(2)<1>			ubCURR_UV(4,2)<16;16,1>		-ubPREV_UV(0,32)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(3)<1>			ubCURR_UV(5,2)<16;16,1>		-ubPREV_UV(0,48)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(4)<1>			ubCURR_UV(6,2)<16;16,1>		-ubPREV_UV(0,64)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(5)<1>			ubCURR_UV(7,2)<16;16,1>		-ubPREV_UV(0,80)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(6)<1>			ubCURR_UV(8,2)<16;16,1>		-ubPREV_UV(0,96)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(7)<1>			ubCURR_UV(9,2)<16;16,1>		-ubPREV_UV(0,112)<16;16,1>		//Diff UV interleaved
+
+	//Update WT HIST
+	(-f0.0) shr 	(16) uwCURBE_TEMP(0)<1>		ubHIST_UV(0,0)<16;16,1>		1:w
+	(f1.0)  add 	(16) uwCURBE_TEMP(2)<1>		ubHIST_UV(0,0)<16;16,1>		r61.24<0;2,1>:ub
+	(f0.0)  mov 	(16) uwCURBE_TEMP(2)<1>		r61.20<0;2,1>:ub
+	(-f0.0.anyv) mov 	(16) uwCURBE_TEMP(2)<1>		ubHIST_UV(0,0)<16;16,1>
+
+	cmp.l.f0.0 (16) null<1>:w		ubHIST_UV(0,16)<16;16,1>	r61.20<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w		ubHIST_UV(0,16)<16;16,1>	r61.22<0;2,1>:ub
+
+	//Compute diff betn curr and prev. - First 16 lines
+	// 8 more lines here
+    add (16)	wDIFF_TEMPORAL(8)<1>			ubCURR_UV(10,2)<16;16,1>		-ubPREV_UV(0,128)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(9)<1>			ubCURR_UV(11,2)<16;16,1>		-ubPREV_UV(0,144)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(10)<1>			ubCURR_UV(12,2)<16;16,1>		-ubPREV_UV(0,160)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(11)<1>			ubCURR_UV(13,2)<16;16,1>		-ubPREV_UV(0,176)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(12)<1>			ubCURR_UV(14,2)<16;16,1>		-ubPREV_UV(0,192)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(13)<1>			ubCURR_UV(15,2)<16;16,1>		-ubPREV_UV(0,208)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(14)<1>			ubCURR_UV(16,2)<16;16,1>		-ubPREV_UV(0,224)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(15)<1>			ubCURR_UV(17,2)<16;16,1>		-ubPREV_UV(0,240)<16;16,1>		//Diff UV interleaved
+
+	(-f0.0) shr 	(16) uwCURBE_TEMP(1)<1>		ubHIST_UV(0,16)<16;16,1>	1:w
+	(f1.0)  add 	(16) uwCURBE_TEMP(3)<1>		ubHIST_UV(0,16)<16;16,1>	r61.24<0;2,1>:ub
+	(f0.0)  mov 	(16) uwCURBE_TEMP(3)<1>		r61.20<0;2,1>:ub
+	(-f0.0.anyv) mov(16) uwCURBE_TEMP(3)<1>		ubHIST_UV(0,16)<16;16,1>
+
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(0)<16;16,1>	(abs)wDIFF_TEMPORAL(1)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(2)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(3)<16;16,1>
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(4)<16;16,1>	(abs)wDIFF_TEMPORAL(5)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(6)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(7)<16;16,1>
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(8)<16;16,1>	(abs)wDIFF_TEMPORAL(9)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(10)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(2)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(11)<16;16,1>
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(12)<16;16,1>	(abs)wDIFF_TEMPORAL(13)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(14)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(3)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(15)<16;16,1>
+
+//Compute diff betn curr and prev. - Second 16 lines
+//13 lines.
+    add (16)	wDIFF_TEMPORAL(16)<1>		ubCURR_UV(22,2)<16;16,1>		-ubPREV_UV(8,0)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(17)<1>		ubCURR_UV(23,2)<16;16,1>		-ubPREV_UV(8,16)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(18)<1>		ubCURR_UV(24,2)<16;16,1>		-ubPREV_UV(8,32)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(19)<1>		ubCURR_UV(25,2)<16;16,1>		-ubPREV_UV(8,48)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(20)<1>		ubCURR_UV(26,2)<16;16,1>		-ubPREV_UV(8,64)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(21)<1>		ubCURR_UV(27,2)<16;16,1>		-ubPREV_UV(8,80)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(22)<1>		ubCURR_UV(28,2)<16;16,1>		-ubPREV_UV(8,96)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(23)<1>		ubCURR_UV(29,2)<16;16,1>		-ubPREV_UV(8,112)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(24)<1>		ubCURR_UV(30,2)<16;16,1>		-ubPREV_UV(8,128)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(25)<1>		ubCURR_UV(31,2)<16;16,1>		-ubPREV_UV(8,144)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(26)<1>		ubCURR_UV(32,2)<16;16,1>		-ubPREV_UV(8,160)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(27)<1>		ubCURR_UV(33,2)<16;16,1>		-ubPREV_UV(8,176)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(28)<1>		ubCURR_UV(34,2)<16;16,1>		-ubPREV_UV(8,192)<16;16,1>		//Diff UV interleaved
+
+//3 more lines
+    add (16)	wCURBE_TEMP(4)<1>		ubCURR_UV(35,2)<16;16,1>		-ubPREV_UV(8,208)<16;16,1>		//Diff UV interleaved
+    add (16)	wCURBE_TEMP(5)<1>		ubCURR_UV(36,2)<16;16,1>		-ubPREV_UV(8,224)<16;16,1>		//Diff UV interleaved
+    add (16)	wCURBE_TEMP(6)<1>		ubCURR_UV(37,2)<16;16,1>		-ubPREV_UV(8,240)<16;16,1>		//Diff UV interleaved
+
+	//16x4 to 8x4 - First 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>		uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>		uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+	//8x4 to 4x4 - First 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>		{ AccWrEn }
+
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(16)<16;16,1>	(abs)wDIFF_TEMPORAL(17)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(18)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(19)<16;16,1>
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(20)<16;16,1>	(abs)wDIFF_TEMPORAL(21)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(22)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(23)<16;16,1>
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(24)<16;16,1>	(abs)wDIFF_TEMPORAL(25)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(26)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(2)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(27)<16;16,1>
+
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(28)<16;16,1>	(abs)wCURBE_TEMP(4)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wCURBE_TEMP(5)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(3)<1>	acc0.0<16;16,1>:uw					(abs)wCURBE_TEMP(6)<16;16,1>
+
+	//Find if block is motion block - First 16 lines
+	cmp.g.f0.0  (16) null<1>:w				uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<16;16,1> 		r61.26<0;2,1>:ub
+
+	//Move TEMPORAL_SUM4x4 for SIMD16 use later.
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,0)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,2)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,4)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,6)<0;2,1>      
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,8)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,10)<0;2,1>     
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,12)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,14)<0;2,1>     
+
+	//Pick Appropriate Weight History Based on motion. - First 16 lines
+	(-f0.0) mov (16) uwCURBE_TEMP(0)<1>		uwCURBE_TEMP(2)<16;16,1>
+
+	//Actual DN - First 16 lines
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(0)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(0)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(2,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(2,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(2,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,0)<8;8,1>					uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,8)<8;8,1>					uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(0)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(0)<1> 			ubCURR_UV(2,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(0)<1>	wDIFF_TEMPORAL(0)<16;16,1>					ubCURR_UV(2,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(1)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(1)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(3,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(3,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(3,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,16)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,24)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(1)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(1)<1>		ubCURR_UV(3,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(1)<1>	wDIFF_TEMPORAL(1)<16;16,1>				ubCURR_UV(3,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(2)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(2)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(4,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(4,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(4,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,32)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,40)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(2)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(2)<1>		ubCURR_UV(4,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(2)<1>	wDIFF_TEMPORAL(2)<16;16,1>				ubCURR_UV(4,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(3)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(3)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(5,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(5,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(5,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,48)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,56)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(3)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(3)<1>		ubCURR_UV(5,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(3)<1>	wDIFF_TEMPORAL(3)<16;16,1>				ubCURR_UV(5,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(4)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(4)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(6,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(6,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(6,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,64)<8;8,1>					uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,72)<8;8,1>					uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(4)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(4)<1> 			ubCURR_UV(6,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(4)<1>	wDIFF_TEMPORAL(4)<16;16,1>					ubCURR_UV(6,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(5)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(5)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(7,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(7,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(7,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,80)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,88)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(5)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(5)<1>		ubCURR_UV(7,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(5)<1>	wDIFF_TEMPORAL(5)<16;16,1>				ubCURR_UV(7,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(6)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(6)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(8,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(8,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(8,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,96)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,104)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(6)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(6)<1>		ubCURR_UV(8,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(6)<1>	wDIFF_TEMPORAL(6)<16;16,1>				ubCURR_UV(8,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(7)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(7)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(9,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(9,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(9,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,112)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,120)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(7)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(7)<1>		ubCURR_UV(9,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(7)<1>	wDIFF_TEMPORAL(7)<16;16,1>				ubCURR_UV(9,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(8)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(8)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(10,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(10,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(10,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,128)<8;8,1>					uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,136)<8;8,1>					uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(8)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(8)<1> 			ubCURR_UV(10,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(8)<1>	wDIFF_TEMPORAL(8)<16;16,1>					ubCURR_UV(10,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(9)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(9)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(11,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(11,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(11,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,144)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,152)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(9)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(9)<1>		ubCURR_UV(11,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(9)<1>	wDIFF_TEMPORAL(9)<16;16,1>				ubCURR_UV(11,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(10)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(10)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(12,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(12,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(12,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,160)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,168)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(10)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(10)<1>		ubCURR_UV(12,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(10)<1>	wDIFF_TEMPORAL(10)<16;16,1>				ubCURR_UV(12,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(11)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(11)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(13,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(13,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(13,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,176)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,184)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(11)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(11)<1>		ubCURR_UV(13,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(11)<1>	wDIFF_TEMPORAL(11)<16;16,1>				ubCURR_UV(13,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(12)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(12)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(14,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(14,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(14,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,192)<8;8,1>					uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,200)<8;8,1>					uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(12)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(12)<1> 			ubCURR_UV(14,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(12)<1>	wDIFF_TEMPORAL(12)<16;16,1>					ubCURR_UV(14,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(13)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(13)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(15,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(15,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(15,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,208)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,216)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(13)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(13)<1>		ubCURR_UV(15,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(13)<1>	wDIFF_TEMPORAL(13)<16;16,1>				ubCURR_UV(15,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(14)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(14)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(16,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(16,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(16,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,224)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,232)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(14)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(14)<1>		ubCURR_UV(16,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(14)<1>	wDIFF_TEMPORAL(14)<16;16,1>				ubCURR_UV(16,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(15)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(15)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(17,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(17,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(17,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,240)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,248)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(15)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(15)<1>		ubCURR_UV(17,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(15)<1>	wDIFF_TEMPORAL(15)<16;16,1>				ubCURR_UV(17,2)<16;16,1>
+
+
+	//16x4 to 8x4 - Second 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+	//8x4 to 4x4 - Second 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>     { AccWrEn }
+
+	//Find if block is motion block - Second 16 lines
+	cmp.g.f1.0  (16) null<1>:w				uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<16;16,1> 		r61.26<0;2,1>:ub
+
+	//Move TEMPORAL_SUM4x4 for SIMD16 use later.
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,0)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,2)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,4)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,6)<0;2,1>      
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,8)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,10)<0;2,1>     
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,12)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,14)<0;2,1>     
+
+	//Pick Appropriate Weight History Based on motion. - Second 16 lines
+	(-f1.0) mov (16) uwCURBE_TEMP(1)<1>		uwCURBE_TEMP(3)<16;16,1>
+
+	//Actual DN - Second 16 lines
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(16)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(16)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(22,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(22,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(22,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,0)<8;8,1>					uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,8)<8;8,1>					uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(16)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(16)<1>			ubCURR_UV(22,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(16)<1>	wDIFF_TEMPORAL(16)<16;16,1>			ubCURR_UV(22,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(17)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(17)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(23,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(23,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(23,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,16)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,24)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(17)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(17)<1>			ubCURR_UV(23,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(17)<1>	wDIFF_TEMPORAL(17)<16;16,1>			ubCURR_UV(23,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(18)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(18)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(24,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(24,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(24,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,32)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,40)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(18)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(18)<1>			ubCURR_UV(24,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(18)<1>	wDIFF_TEMPORAL(18)<16;16,1>			ubCURR_UV(24,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(19)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(19)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(25,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(25,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(25,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,48)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,56)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(19)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(19)<1>			ubCURR_UV(25,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(19)<1>	wDIFF_TEMPORAL(19)<16;16,1>			ubCURR_UV(25,2)<16;16,1>
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(20)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(20)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(26,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(26,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(26,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,64)<8;8,1>					uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,72)<8;8,1>					uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(20)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(20)<1>			ubCURR_UV(26,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(20)<1>	wDIFF_TEMPORAL(20)<16;16,1>			ubCURR_UV(26,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(21)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(21)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(27,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(27,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(27,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,80)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,88)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(21)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(21)<1>			ubCURR_UV(27,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(21)<1>	wDIFF_TEMPORAL(21)<16;16,1>			ubCURR_UV(27,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(22)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(22)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(28,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(28,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(28,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,96)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,104)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(22)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(22)<1>			ubCURR_UV(28,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(22)<1>	wDIFF_TEMPORAL(22)<16;16,1>			ubCURR_UV(28,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(23)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(23)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(29,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(29,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(29,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,112)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,120)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(23)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(23)<1>			ubCURR_UV(29,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(23)<1>	wDIFF_TEMPORAL(23)<16;16,1>			ubCURR_UV(29,2)<16;16,1>
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(24)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(24)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(30,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(30,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(30,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,128)<8;8,1>					uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,136)<8;8,1>					uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(24)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(24)<1>			ubCURR_UV(30,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(24)<1>	wDIFF_TEMPORAL(24)<16;16,1>			ubCURR_UV(30,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(25)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(25)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(31,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(31,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(31,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,144)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,152)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(25)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(25)<1>			ubCURR_UV(31,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(25)<1>	wDIFF_TEMPORAL(25)<16;16,1>			ubCURR_UV(31,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(26)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(26)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(32,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(32,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(32,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,160)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,168)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(26)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(26)<1>			ubCURR_UV(32,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(26)<1>	wDIFF_TEMPORAL(26)<16;16,1>			ubCURR_UV(32,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(27)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(27)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(33,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(33,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(33,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,176)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,184)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(27)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(27)<1>			ubCURR_UV(33,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(27)<1>	wDIFF_TEMPORAL(27)<16;16,1>			ubCURR_UV(33,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(28)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(28)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(34,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(34,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(34,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,192)<8;8,1>					uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,200)<8;8,1>					uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(28)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(28)<1>			ubCURR_UV(34,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(28)<1>	wDIFF_TEMPORAL(28)<16;16,1>				ubCURR_UV(34,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(4)<16;16,1>				r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(4)<16;16,1>				r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(35,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(35,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(35,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,208)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,216)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wCURBE_TEMP(4)<1>				acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wCURBE_TEMP(4)<1>				ubCURR_UV(35,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wCURBE_TEMP(4)<1>		wCURBE_TEMP(4)<16;16,1>				ubCURR_UV(35,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(5)<16;16,1>				r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(5)<16;16,1>				r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(36,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(36,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(36,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,224)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,232)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wCURBE_TEMP(5)<1> 				acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wCURBE_TEMP(5)<1>				ubCURR_UV(36,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wCURBE_TEMP(5)<1>		wCURBE_TEMP(5)<16;16,1>				ubCURR_UV(36,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(6)<16;16,1>				r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(6)<16;16,1>				r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(37,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(37,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(37,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,240)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,248)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wCURBE_TEMP(6)<1>				acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wCURBE_TEMP(6)<1>				ubCURR_UV(37,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wCURBE_TEMP(6)<1>		wCURBE_TEMP(6)<16;16,1>				ubCURR_UV(37,2)<16;16,1>
+
+	//Pack Weight History WORD -> BYTE
+	mov (16) ubCURBE_TEMP(3,0)<1>		ubCURBE_TEMP(0)<32;16,2>	
+	mov (16) ubCURBE_TEMP(3,16)<1>		ubCURBE_TEMP(1)<32;16,2>	
+
+
+
+//Module Name 	: DN_UV_Compute_BNE_UV
+//Author		: Tatiya, Rupesh
+//Description	: Computes minimum SOAD for each 16x4 block.
+
+	cmp.l.f0.0  (8) null:w     				uwSOAD_MIN_8x4(0,12)<16;4,1> 	uwSOAD_MIN_8x4(2,12)<16;4,1>
+	(f0.0)sel	(8) uwCURBE_TEMP(1,0)<1>	uwSOAD_MIN_8x4(0,12)<16;4,1> 	uwSOAD_MIN_8x4(2,12)<16;4,1>
+
+    mov  (8)	ubCURBE_TEMP(1)<1>			ubCURBE_TEMP(1)<16;8,2>
+
+
+
+//Module Name 	: DN_UV_PL2_Pack_Denoised_UV
+//Name			: Tatiya, Rupesh
+//Description	: Pack UV denoised data based on PL2 input.
+
+
+
+//Module Name 	: DN_UV_Pack_Denoised_UV
+//Name			: Tatiya, Rupesh
+//Description	: Pack UV denoised data based on PL2/PL3/PA.
+
+
+//First 16 lines.
+	mov  (16)	ubMSGPAYLOAD_UV0(0,0)<1>		ubDIFF_TEMPORAL(0)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(0,16)<1>		ubDIFF_TEMPORAL(1)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(1,0)<1>		ubDIFF_TEMPORAL(2)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(1,16)<1>		ubDIFF_TEMPORAL(3)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(2,0)<1>		ubDIFF_TEMPORAL(4)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(2,16)<1>		ubDIFF_TEMPORAL(5)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(3,0)<1>		ubDIFF_TEMPORAL(6)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(3,16)<1>		ubDIFF_TEMPORAL(7)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(4,0)<1>		ubDIFF_TEMPORAL(8)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(4,16)<1>		ubDIFF_TEMPORAL(9)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(5,0)<1>		ubDIFF_TEMPORAL(10)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(5,16)<1>		ubDIFF_TEMPORAL(11)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(6,0)<1>		ubDIFF_TEMPORAL(12)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(6,16)<1>		ubDIFF_TEMPORAL(13)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(7,0)<1>		ubDIFF_TEMPORAL(14)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV0(7,16)<1>		ubDIFF_TEMPORAL(15)<32;16,2>		
+
+//Second 16 lines.
+//12 lines first
+	mov  (16)	ubMSGPAYLOAD_UV1(0,0)<1>		ubDIFF_TEMPORAL(16)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(0,16)<1>		ubDIFF_TEMPORAL(17)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(1,0)<1>		ubDIFF_TEMPORAL(18)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(1,16)<1>		ubDIFF_TEMPORAL(19)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(2,0)<1>		ubDIFF_TEMPORAL(20)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(2,16)<1>		ubDIFF_TEMPORAL(21)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(3,0)<1>		ubDIFF_TEMPORAL(22)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(3,16)<1>		ubDIFF_TEMPORAL(23)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(4,0)<1>		ubDIFF_TEMPORAL(24)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(4,16)<1>		ubDIFF_TEMPORAL(25)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(5,0)<1>		ubDIFF_TEMPORAL(26)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(5,16)<1>		ubDIFF_TEMPORAL(27)<32;16,2>		
+
+	//3 lines next
+	mov  (16)	ubMSGPAYLOAD_UV1(6,0)<1>		ubDIFF_TEMPORAL(28)<32;16,2>		
+	mov  (16)	ubMSGPAYLOAD_UV1(6,16)<1>		ubCURBE_TEMP(4)<32;16,2>			
+	mov  (16)	ubMSGPAYLOAD_UV1(7,0)<1>		ubCURBE_TEMP(5)<32;16,2>			
+	mov  (16)	ubMSGPAYLOAD_UV1(7,16)<1>		ubCURBE_TEMP(6)<32;16,2>			
+
+
+
+//Module Name 	: DN_UV_420_Save_Curr_Frame_Y
+//Author		: Tatiya, Rupesh
+//Description	: Save Curr Frame Y data for 420 Input
+
+
+
+//Module Name 	: DN_UV_Load_Curr_Frame_Y
+//Author		: Tatiya, Rupesh
+//Description	: Saves Y or YUY2 of Current frame.
+
+
+
+
+	mov (8)		acc0.0<1>:ud		r0.0<8;8,1>:ud
+	mov (2)		acc0.0<1>:d			r62.10<2;2,1>:w
+
+	mov (1)		acc0.2<1>:d			0xF000F:ud
+
+	mov (8)     r92.0<1>:ud	acc0.0<8;8,1>:ud
+
+	mov (8)     r101.0<1>:ud	acc0.0<8;8,1>:ud
+	mov (8)     r110.0<1>:ud	acc0.0<8;8,1>:ud
+	mov (8)     r119.0<1>:ud	acc0.0<8;8,1>:ud
+
+	add (1)		r101.1<1>:d 	acc0.1<0;1,0>:d   		16:d
+
+	add (1)		r110.0<1>:d 	acc0.0<0;1,0>:d   		16:d
+
+	add (2)		r119.0<1>:d 	acc0.0<2;2,1>:d   		16:d
+
+	send (8)	null<1>:d	r92		0x5		0x120A8018:ud
+	send (8)	null<1>:d	r101		0x5		0x120A8018:ud
+	send (8)	null<1>:d	r110		0x5		0x120A8018:ud
+	send (8)	null<1>:d	r119		0x5		0x120A8018:ud
+
+
+	//TODO - See if History saving can be combined with Curr Frame Save. - rT
+
+
+//Module Name 	: DN_UV_Save_Hist_UV
+//Author		: Tatiya, Rupesh
+//Description	: Saves DN history for UV data.
+
+	mov (8)  r3.0<1>:ud	r0.0<8;8,1>:ud
+	mov (2)	 r3.0<1>:d	r62.12<2;2,1>:w				
+	mov (1)	 r3.2<1>:d	0x30007:ud		
+
+	send (8) null<1>:d	r3		0x5		0x40A8021:ud
+
+
+
+//Module Name	: DN_UV_Save_BNE_UV
+//Author		: Tatiya, Rupesh
+//Description	: Saves BNE values for 16x16 U and 16x16 V.
+
+	mov (8)  r1.0<1>:ud	r0.0<8;8,1>:ud
+	mov (2)	 r1.0<1>:d		r63.12<2;2,1>:w					
+	mov (1)	 r1.2<1>:d		0x10003:ud		
+
+	send (8) null<1>:d	r1		0x5		0x40A8023:ud
+
+
+
+//Module Name 	: DN_UV_PL2_Save_Curr_Frame_UV
+//Author		: Tatiya, Rupesh
+
+
+
+//Module name 	:  DN_UV_Save_Curr_Frame_UV
+//Author		:  Tatiya, Rupesh
+//Description	:  Saves Current Frame (UV only).
+
+
+
+
+	mov  (8) 		r74<1>:ud		r0.0<8;8,1>:ud
+	mov  (8) 		r83<1>:ud		r0.0<8;8,1>:ud
+
+	mov (1)	 r74.0<1>:d		r62.10<0;1,0>:w						
+	shr (1)  r74.1<1>:d		r62.11<0;1,0>:w			1:w			
+	mov (1)	 r74.2<1>:d		0xF000F:ud		
+
+	add (1)	 r83.0<1>:d		r62.10<0;1,0>:w			16:d		
+	shr (1)	 r83.1<1>:d		r62.11<0;1,0>:w			 1:w		
+	mov (1)	 r83.2<1>:d		0xF000F:ud		
+
+	send (8) null<1>:d	r74		0x5		0x120A8019:ud
+	send (8) null<1>:d	r83		0x5		0x120A8019:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+	//All sub-routines here
+
+
+// Module Name  : Noise_Detection
+// Author		: Tatiya, Rupesh
+// Description	: Performs noise detection on 32 pixels of U (8x4) and 32 pixels of V (8x4).
+
+DN_UV_NOISE_DETECTION_UV:
+
+// Find Field Block Median
+//
+// Purpose   : Find the median value of the nine pixels in the same field
+//             which are centered at current pixel.
+//
+//             Works on 9 pixels centered at the current pixel
+//                NOTE: pixels are within same field.
+//                      v4 - current pixel
+//
+//                  v2 v1 v0
+//                   *  *  *     <--- Different field - not used
+//                  v5 v4 v3
+//                   *  *  *     <--- Different field - not used
+//                  v8 v7 v6
+
+// Algorithm to find median modifies the data.
+// Copy the data needed to calculate median so the original source data stays intact.
+//
+
+//TODO - Change Interleaved implementation to separated one if - ,  does not work on predication. - rT
+
+//Delete Later - rT
+//mov (1) pCUR_UV:uw		52*32:uw
+
+// v0
+mov (16) ubMEDIAN_TEMP(0,0)<1>    	r[a0.0,0]<16;16,1>		
+// v0
+mov (16) ubMEDIAN_TEMP(0,16)<1>   	r[a0.0,32]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(1,0)<1>    	r[a0.0,2]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(1,16)<1>   	r[a0.0,34]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(2,0)<1>    	r[a0.0,4]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(2,16)<1>   	r[a0.0,36]<16;16,1>		
+// v3
+mov (16) ubMEDIAN_TEMP(3,0)<1>    	r[a0.0,64]<16;16,1>  	
+// v3
+mov (16) ubMEDIAN_TEMP(3,16)<1>   	r[a0.0,96]<16;16,1>		
+// v4
+mov (16) ubMEDIAN_TEMP(4,0)<1>		r[a0.0,66]<16;16,1>  	
+// v4
+mov (16) ubMEDIAN_TEMP(4,16)<1>   	r[a0.0,98]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(5,0)<1>		r[a0.0,68]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(5,16)<1>   	r[a0.0,100]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(6,0)<1>    	r[a0.0,128]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(6,16)<1>   	r[a0.0,160]<16;16,1>		
+// v7
+mov (16) ubMEDIAN_TEMP(7,0)<1>		r[a0.0,130]<16;16,1>  	
+// v7
+mov (16) ubMEDIAN_TEMP(7,16)<1>   	r[a0.0,162]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(8,0)<1>		r[a0.0,132]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(8,16)<1>   	r[a0.0,164]<16;16,1>  	
+
+//TODO - Optimize one instruction here.
+add (1)  a0.0:uw		a0.0<0;1,0>:uw 64:uw
+// v0
+mov (16) ubMEDIAN_TEMP(9,0)<1>    	r[a0.0,0]<16;16,1>		
+// v0
+mov (16) ubMEDIAN_TEMP(9,16)<1>   	r[a0.0,32]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(10,0)<1>    	r[a0.0,2]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(10,16)<1>   	r[a0.0,34]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(11,0)<1>    	r[a0.0,4]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(11,16)<1>   	r[a0.0,36]<16;16,1>		
+// v3
+mov (16) ubMEDIAN_TEMP(12,0)<1>    	r[a0.0,64]<16;16,1>  	
+// v3
+mov (16) ubMEDIAN_TEMP(12,16)<1>   	r[a0.0,96]<16;16,1>		
+// v4
+mov (16) ubMEDIAN_TEMP(13,0)<1>		r[a0.0,66]<16;16,1>  	
+// v4
+mov (16) ubMEDIAN_TEMP(13,16)<1>   	r[a0.0,98]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(14,0)<1>		r[a0.0,68]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(14,16)<1>   	r[a0.0,100]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(15,0)<1>    	r[a0.0,128]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(15,16)<1>   	r[a0.0,160]<16;16,1>		
+// v7
+mov (16) ubMEDIAN_TEMP(16,0)<1>		r[a0.0,130]<16;16,1>  	
+// v7
+mov (16) ubMEDIAN_TEMP(16,16)<1>   	r[a0.0,162]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(17,0)<1>		r[a0.0,132]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(17,16)<1>   	r[a0.0,164]<16;16,1>  	
+
+//TODO - Optimize one instruction here.
+add (1)  a0.0:uw		a0.0<0;1,0>:uw 64:uw
+
+// MedianSwap
+//
+//  MedianSwap(inOutLeft, inOutRight)
+//  {
+//      if (inOutLeft > inOutRight)
+//      {
+//          temp = inOutLeft
+//          inOutLeft = inOutRight
+//          inOutRight = temp
+//      }
+//  }
+
+// MedianSwap(v1, v2) - U
+// MedianSwap(v4, v5) - U
+// MedianSwap(v1, v2) - V
+// MedianSwap(v4, v5) - V
+
+cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>  ubMEDIAN_TEMP(2,0)<32;16,2>
+cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  ubMEDIAN_TEMP(5,0)<32;16,2>
+cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(2,1)<32;16,2>
+cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
+
+       	mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(1,0)<32;16,2>	
+       	mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>	
+       	mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(1,1)<32;16,2>	
+		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>	
+
+(f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2>     	ubMEDIAN_TEMP(2,0)<32;16,2>		
+(f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>		ubMEDIAN_TEMP(5,0)<32;16,2>		
+(f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2>  		ubMEDIAN_TEMP(2,1)<32;16,2>		
+(f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(5,1)<32;16,2>		
+
+(f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>		
+(f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2>     ubTEMP1(0,16)<16;16,1>		
+(f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(1,0)<16;16,1>		
+(f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2>     ubTEMP1(1,16)<16;16,1>   	
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(7,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(0,0)<32;16,2>  	ubMEDIAN_TEMP(1,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(7,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(0,1)<32;16,2> 	ubMEDIAN_TEMP(1,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(7,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(0,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(7,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(0,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2>   ubMEDIAN_TEMP(8,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2>	ubMEDIAN_TEMP(1,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2>	ubMEDIAN_TEMP(1,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(1,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(1,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(3,0)<32;16,2>   	ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(6,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(3,1)<32;16,2> 	ubMEDIAN_TEMP(4,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(6,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(3,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(6,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(3,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(6,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(3,0)<2>   ubMEDIAN_TEMP(4,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(6,0)<2>	ubMEDIAN_TEMP(7,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(3,1)<2>  	ubMEDIAN_TEMP(4,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(6,1)<2>	ubMEDIAN_TEMP(7,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(2,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(1,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(1,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2>   ubMEDIAN_TEMP(2,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>	ubMEDIAN_TEMP(5,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2>  	ubMEDIAN_TEMP(2,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>	ubMEDIAN_TEMP(5,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(7,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(0,0)<32;16,2>  	ubMEDIAN_TEMP(3,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(7,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(0,1)<32;16,2> 	ubMEDIAN_TEMP(3,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(7,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(0,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(7,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(0,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2>   ubMEDIAN_TEMP(8,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2>	ubMEDIAN_TEMP(3,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2>	ubMEDIAN_TEMP(3,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(3,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(3,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,0)<32;16,2> 	ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,1)<32;16,2> 	ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0)  mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0)  mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(5,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(5,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(5,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(5,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(5,0)<2>    	ubMEDIAN_TEMP(8,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>		ubMEDIAN_TEMP(7,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(5,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(7,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0  (16) null:w         ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0  (16) null:w         ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0)  mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0)  mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(3,0)<32;16,2>   	ubMEDIAN_TEMP(6,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>  	ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(3,1)<32;16,2> 	ubMEDIAN_TEMP(6,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(6,0)<2>     ubMEDIAN_TEMP(3,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubMEDIAN_TEMP(1,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(6,1)<2>     ubMEDIAN_TEMP(3,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubMEDIAN_TEMP(1,1)<32;16,2>      
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(2,0)<32;16,2>   	ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(2,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubMEDIAN_TEMP(5,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubMEDIAN_TEMP(7,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubMEDIAN_TEMP(5,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubMEDIAN_TEMP(7,1)<32;16,2>      
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2>  	ubMEDIAN_TEMP(2,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(4,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(2,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(2,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(0,16)<16;16,1>	
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(6,0)<32;16,2>   ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(6,1)<32;16,2>   ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(6,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(6,1)<32;16,2>		
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2>  	ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(2,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(2,1)<32;16,2>		
+cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>  ubMEDIAN_TEMP(11,0)<32;16,2>
+cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  ubMEDIAN_TEMP(14,0)<32;16,2>
+cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(11,1)<32;16,2>
+cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
+
+       	mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(10,0)<32;16,2>	
+       	mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>	
+       	mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(10,1)<32;16,2>	
+		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>	
+
+(f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2>     	ubMEDIAN_TEMP(11,0)<32;16,2>		
+(f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>		ubMEDIAN_TEMP(14,0)<32;16,2>		
+(f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2>  		ubMEDIAN_TEMP(11,1)<32;16,2>		
+(f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(14,1)<32;16,2>		
+
+(f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>		
+(f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2>     ubTEMP1(0,16)<16;16,1>		
+(f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(1,0)<16;16,1>		
+(f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2>     ubTEMP1(1,16)<16;16,1>   	
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(16,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(9,0)<32;16,2>  	ubMEDIAN_TEMP(10,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(16,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(9,1)<32;16,2> 	ubMEDIAN_TEMP(10,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(16,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(9,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(16,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(9,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2>   ubMEDIAN_TEMP(17,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2>	ubMEDIAN_TEMP(10,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2>	ubMEDIAN_TEMP(10,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(10,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(10,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(12,0)<32;16,2>   	ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(15,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(12,1)<32;16,2> 	ubMEDIAN_TEMP(13,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(15,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(12,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(15,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(12,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(15,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(12,0)<2>   ubMEDIAN_TEMP(13,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(15,0)<2>	ubMEDIAN_TEMP(16,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(12,1)<2>  	ubMEDIAN_TEMP(13,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(15,1)<2>	ubMEDIAN_TEMP(16,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(11,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(10,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(10,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2>   ubMEDIAN_TEMP(11,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>	ubMEDIAN_TEMP(14,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2>  	ubMEDIAN_TEMP(11,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>	ubMEDIAN_TEMP(14,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(16,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(9,0)<32;16,2>  	ubMEDIAN_TEMP(12,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(16,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(9,1)<32;16,2> 	ubMEDIAN_TEMP(12,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(16,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(9,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(16,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(9,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2>   ubMEDIAN_TEMP(17,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2>	ubMEDIAN_TEMP(12,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2>	ubMEDIAN_TEMP(12,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(12,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(12,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,0)<32;16,2> 	ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,1)<32;16,2> 	ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0)  mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0)  mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(14,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(14,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(14,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(14,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(14,0)<2>    	ubMEDIAN_TEMP(17,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>		ubMEDIAN_TEMP(16,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(14,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(16,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0  (16) null:w         ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0  (16) null:w         ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0)  mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0)  mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(12,0)<32;16,2>   	ubMEDIAN_TEMP(15,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>  	ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(12,1)<32;16,2> 	ubMEDIAN_TEMP(15,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(15,0)<2>     ubMEDIAN_TEMP(12,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubMEDIAN_TEMP(10,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(15,1)<2>     ubMEDIAN_TEMP(12,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubMEDIAN_TEMP(10,1)<32;16,2>      
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(11,0)<32;16,2>   	ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(11,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubMEDIAN_TEMP(14,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubMEDIAN_TEMP(16,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubMEDIAN_TEMP(14,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubMEDIAN_TEMP(16,1)<32;16,2>      
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2>  	ubMEDIAN_TEMP(11,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(13,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(11,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(11,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(0,16)<16;16,1>	
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(15,0)<32;16,2>   ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(15,1)<32;16,2>   ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(15,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(15,1)<32;16,2>		
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2>  	ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(11,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(11,1)<32;16,2>		
+
+// Sobel Value calculation for the current pixel v4
+//          v2 v1 v0
+//           *  *  *     <--- Different field - not used
+//          v5 v4 v3
+//           *  *  *     <--- Different field - not used
+//          v8 v7 v6
+//
+//    Gx = -v0 - 2*v3 - v6 + v2 + 2*v5 + v8
+//    Gy =  v0 + 2*v1 + v2 - v6 - 2*v7 - v8
+//
+//  Sobel = (|Gx| + |Gy|) >> 3
+
+//TODO - Change Later - rT
+add (1) a0.0:uw  a0.0<0;1,0>:uw -128:uw
+
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,64]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,132]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,0]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,128]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,4]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(0)<1> 	r[a0.0,68]<16;16,1>:ub   		2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,96]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,164]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,32]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,160]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,36]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(1)<1> 	r[a0.0,100]<16;16,1>:ub   		2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,128]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,196]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,64]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,192]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,68]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(2)<1> 	r[a0.0,132]<16;16,1>:ub   		2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,160]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,228]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,96]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,224]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,100]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(3)<1> 	r[a0.0,164]<16;16,1>:ub   		2:w
+
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,2]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,0]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,132]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,4]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,128]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,130]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(0)<16;16,1>
+
+shr (16) uwSOBEL(0)<1>	acc0.0<16;16,1>:uw   3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,34]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,32]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,164]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,36]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,160]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,162]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(1)<16;16,1>
+
+shr (16) uwSOBEL(1)<1>	acc0.0<16;16,1>:uw   3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,66]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,64]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,196]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,68]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,192]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,194]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(2)<16;16,1>
+
+shr (16) uwSOBEL(2)<1>	acc0.0<16;16,1>:uw   3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,98]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,96]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,228]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,100]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,224]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,226]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(3)<16;16,1>
+
+shr (16) uwSOBEL(3)<1>	acc0.0<16;16,1>:uw   3:uw
+
+//Mov Median in CURBE_TEMP to free up temp space.
+mov (16)	ubMEDIAN(0,0)<1>  	ubMEDIAN_TEMP(4,0)<16;16,1>		
+mov (16)	ubMEDIAN(0,16)<1> ubMEDIAN_TEMP(4,16)<16;16,1>		
+mov (16)	ubMEDIAN(0,32)<1>  	ubMEDIAN_TEMP(13,0)<16;16,1>		
+mov (16)	ubMEDIAN(0,48)<1> ubMEDIAN_TEMP(13,16)<16;16,1>		
+
+// Find:
+//      absDiff = abs(ubCurY - ubMedian)
+// Find the difference between pixel and median value.
+
+//Median is interleaved. So difference is also interleaved.
+
+//------------------------------------------------------------------------------------------
+//Process 16 U and 16 V pixels here and rest later.
+// first row - v0,v1,v2
+add (16) wDIFF(0)<1>   r[a0.0,0]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(1)<1>   r[a0.0,2]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(2)<1>   r[a0.0,4]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(3)<1>   r[a0.0,64]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(4)<1>   r[a0.0,66]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(5)<1>   r[a0.0,68]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(6)<1>   r[a0.0,128]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(7)<1>   r[a0.0,130]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(8)<1>   r[a0.0,132]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+// first row - v0,v1,v2
+add (16) wDIFF(9)<1>   r[a0.0,32]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(10)<1>   r[a0.0,34]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(11)<1>   r[a0.0,36]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(12)<1>   r[a0.0,96]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(13)<1>   r[a0.0,98]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(14)<1>   r[a0.0,100]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(15)<1>   r[a0.0,160]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(16)<1>   r[a0.0,162]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(17)<1>   r[a0.0,164]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+
+//TODO - Change Later - rT
+add (1) a0.0:uw  a0.0<0;1,0>:uw 64:uw
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//First 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(0)<16;16,1>  (abs)wDIFF(1)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(2)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(3)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(4)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(5)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(6)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(7)<16;16,1>
+	add        (16) uwSOAD(0)<1>  	 acc0.0<16;16,1>:uw 		(abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//------------
+	//DIFF(0-7) is not needed here. Populate it.
+	// first row - v0,v1,v2
+	add (16) wDIFF(0)<1>   r[a0.0,0]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(1)<1>   r[a0.0,2]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(2)<1>   r[a0.0,4]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+
+	// second row - v3,v4,v5
+	add (16) wDIFF(3)<1>   r[a0.0,64]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(4)<1>   r[a0.0,66]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(5)<1>   r[a0.0,68]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+
+	// third row - v6,v7
+	add (16) wDIFF(6)<1>   r[a0.0,128]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(7)<1>   r[a0.0,130]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(0)<1> uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+
+//------------
+	//Load v8 - DIFF(8)
+	add (16) wDIFF(8)<1>   			r[a0.0,132]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+//------------
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(9)<16;16,1>  (abs)wDIFF(10)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(11)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(12)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(13)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(14)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(15)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(16)<16;16,1>
+	add        (16) uwSOAD(1)<1>  	 acc0.0<16;16,1>:uw 		(abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//------------
+	//DIFF(0-7) is not needed here. Populate it.
+	// first row - v0,v1,v2
+	add (16) wDIFF(9)<1>   r[a0.0,32]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(10)<1>   r[a0.0,34]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(11)<1>   r[a0.0,36]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+
+	// second row - v3,v4,v5
+	add (16) wDIFF(12)<1>   r[a0.0,96]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(13)<1>   r[a0.0,98]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(14)<1>   r[a0.0,100]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+
+	// third row - v6,v7
+	add (16) wDIFF(15)<1>   r[a0.0,160]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(16)<1>   r[a0.0,162]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(1)<1> uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+
+//------------
+	//Load v8 - DIFF(8)
+	add (16) wDIFF(17)<1>   			r[a0.0,164]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max-block_min) < m_LocalDiffThreshold))
+//						if (sigma_mb_min > sigma)
+//							sigma_mb_min = sigma;
+
+//NOTE: block_min is always zero as median is one of the value in 3x3 block. So no need o calculate it.
+//		So just do -
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max) < m_LocalDiffThreshold) && ( sigma < sigma_mb_min))
+//							sigma_mb_min = sigma;
+
+//We are processing 32 bytes of U and 32 bytes of V - each of size 8x4.
+//Compare first 8 bytes with max possible (255).
+//Start above condition from second 8 bytes.
+
+//TODO - Change Later - rT
+//	mov (1)	pCUR_MIN_SOAD_8x4:uw	1752:uw		//r54.24:ub
+
+//First row of 8x4
+        cmp.l.f0.0 	(16) null:uw     		uwSOBEL(0)<16;16,1>         r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(0)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(0)<16;16,1>			255:uw
+(f0.0)  sel 		(16) uwSOBEL(0)<1>   uwSOAD(0)<16;16,1>			255:uw
+
+//Second row of 8x4
+		cmp.l.f0.0 	(16) null:uw     		uwSOBEL(1)<16;16,1>         r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(1)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(1)<16;16,1>			uwSOBEL(0)<16;16,1>
+(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(1)<16;16,1>
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//Second 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(0)<16;16,1>  (abs)wDIFF(1)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(2)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(3)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(4)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(5)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(6)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(7)<16;16,1>
+	add        (16) uwSOAD(0)<1> 	 acc0.0<16;16,1>:uw 		(abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(9)<16;16,1>  (abs)wDIFF(10)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(11)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(12)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(13)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(14)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(15)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(16)<16;16,1>
+	add        (16) uwSOAD(1)<1> 	 acc0.0<16;16,1>:uw 		(abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(1)<1> 	uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+
+//Third row of 8x4
+        cmp.l.f0.0 	(16) null:uw     		uwSOBEL(2)<16;16,1>     	r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(0)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(0)<16;16,1>			uwSOBEL(0)<16;16,1>
+(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(0)<16;16,1>
+
+//Fourth row of 8x4
+		cmp.l.f0.0 	(16) null:uw     		uwSOBEL(3)<16;16,1>     	r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(1)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(1)<16;16,1>			uwSOBEL(0)<16;16,1>
+(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(1)<16;16,1>
+
+		cmp.l.f0.0 	(8) null:uw     		uwSOBEL(0,0)<8;8,1>  	uwSOBEL(0,8)<8;8,1>
+(f0.0)  sel 		(8) uwSOBEL(0)<1>   	uwSOBEL(0,0)<8;8,1>  	uwSOBEL(0,8)<8;8,1>
+
+		cmp.l.f0.0 	(4) null:uw     		uwSOBEL(0,0)<4;4,1>  	uwSOBEL(0,4)<4;4,1>
+(f0.0)  sel 		(4) uwSOBEL(0)<1>   	uwSOBEL(0,0)<4;4,1>  	uwSOBEL(0,4)<4;4,1>
+
+		cmp.l.f0.0 	(2) null:uw     					uwSOBEL(0,0)<2;2,1>  uwSOBEL(0,2)<2;2,1>
+(f0.0)  sel 		(2) r[a0.1,0]<1>:uw   	uwSOBEL(0,0)<2;2,1>  uwSOBEL(0,2)<2;2,1>
+
+
+
+
+
+
+// End of common.inc
+
+mov (1) ip:ud r7.7<0;1,0>:d
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/NV12_DN_422CP.g4a b/src/shaders/post_processing/gen7/NV12_DN_422CP.g4a
new file mode 100644
index 0000000..926469e
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DN_422CP.g4a
@@ -0,0 +1,539 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//  113    // Total instruction count
+//    1    // Total kernel count
+
+.kernel NV12_DN_422CP
+.code
+
+
+
+// FileName:	DN_PL_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block) for planar format
+
+
+
+// FileName:	DN.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x45E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(4,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+
+	mov (2)    mudMSGHDR_HIST(1)<1>    	udDNDI_RESP(4,0)<2;2,1>    	// Move denoise history to MRF (4x2)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x10003:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x50003:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE |   X  |   X   |  X  |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	//|            X             |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	mov (1)		mubMSGHDR_ENC_STATS(1,0)<1>		ubDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr }				// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,3)<1>		uwDNDI_RESP(4,11)<0;1,0>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,4)<1>		uwDNDI_RESP(4,12)<2;2,1>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,9)<1>		uwDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,10)<1>	uwDNDI_RESP(4,9)<2;2,1>    		{ NoDDChk }				// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Load_UV_NV12_16x8.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x8 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Load_16x8.asm
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x8 block through DATAPORT
+
+//CHANGE	:  Read extra UV data to convert to 422. -rT
+//we are reading extra data in ALL cases irrespective of whether upsampling is reqd or not later on, to keep things simple.
+
+
+	add (2)		r27.0<1>:d				r7.0<2;2,1>:w				r4.4<2;2,1>:w				// Source Y Block origin
+	asr (1)  	r27.1<1>:d     			r27.1<0;1,0>:d       	1:w   						{ NoDDClr }	// U/V block origin should be half of Y's
+	mov (1)		r27.2<1>:ud				0x4000F:ud  					{ NoDDChk }	// U/V block width and height (8x5)
+    mov (8)     mudMSGHDR_UVCOPY(0)<1>    	r27.0<8;8,1>:ud
+	send (8)	udDNDI_UV_RESP(0)<1>		r36	0x4	0x2390001:ud
+
+	//Update Header for Save
+	mov (1)		mudMSGHDR_UVCOPY(0,2)<1>	0x3000F:ud									// U/V block width and height (8x4)
+
+
+
+// FileName:    DN_Save_Y_16x8.asm
+// Author:      Vivek Kumar
+// Description: Save one 16x8 blocks of Y channel of DN output for reference
+
+
+mov (8)     mudDN_Y_OUT(0,0)<1>     r0<8;8,1>:ud                                // message header
+mov (2)     mudDN_Y_OUT(0,0)<1>     r7.0<2;2,1>:w                  { NoDDClr }     // X origin
+mov (1)     mudDN_Y_OUT(0,2)<1>     0x7000F:ud    { NoDDChk }     // block width and height (16x8)
+
+//send out data through data port
+send (8)    null<1>:d    mudDN_Y_OUT      0x5    0xA0A8018:ud
+
+
+
+// FileName:	DN_Save_UV_NV12_16x8.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x8 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Save_16x8.asm
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x8 block through DATAPORT
+
+
+//Reuse the header from Load component
+//Header is modified at the end of load - to be usable for save.
+
+	mov (8)		mudMSGHDR_UVCOPY(1)<1>		udDNDI_UV_RESP(0)<8;8,1>
+	mov (8)		mudMSGHDR_UVCOPY(2)<1>		udDNDI_UV_RESP(1)<8;8,1>
+	send (8)	null<1>:d    r36	0x5    0x60A8019:ud
+
+
+
+// FileName:	DN_Upsample_UV_NV12_16x8.asm
+// Author:		Tatiya, Rupesh
+// Description:	Upconvert 420 UV to 422
+
+
+
+// FileName:	UVCopy_Upsample_UV_16x8.asm
+// Author:		Tatiya, Rupesh
+// Description:	Convert 42X UV to 422 - to be used for IECP.
+
+
+	avg.sat (16) uwDNDI_UVCOPY_TEMP(0)		ubDNDI_UV_RESP(0,0)<16;16,1>     ubDNDI_UV_RESP(0,0)<16;16,1>
+	avg.sat (16) uwDNDI_UVCOPY_TEMP(1)	ubDNDI_UV_RESP(0,0)<16;16,1>     ubDNDI_UV_RESP(0,16)<16;16,1>
+	avg.sat (16) uwDNDI_UVCOPY_TEMP(2)		ubDNDI_UV_RESP(0,16)<16;16,1>     ubDNDI_UV_RESP(0,16)<16;16,1>
+	avg.sat (16) uwDNDI_UVCOPY_TEMP(3)	ubDNDI_UV_RESP(0,16)<16;16,1>     ubDNDI_UV_RESP(0,32)<16;16,1>
+	avg.sat (16) uwDNDI_UVCOPY_TEMP(4)		ubDNDI_UV_RESP(0,32)<16;16,1>     ubDNDI_UV_RESP(0,32)<16;16,1>
+	avg.sat (16) uwDNDI_UVCOPY_TEMP(5)	ubDNDI_UV_RESP(0,32)<16;16,1>     ubDNDI_UV_RESP(0,48)<16;16,1>
+	avg.sat (16) uwDNDI_UVCOPY_TEMP(6)		ubDNDI_UV_RESP(0,48)<16;16,1>     ubDNDI_UV_RESP(0,48)<16;16,1>
+	avg.sat (16) uwDNDI_UVCOPY_TEMP(7)	ubDNDI_UV_RESP(0,48)<16;16,1>     ubDNDI_UV_RESP(0,64)<16;16,1>
+
+	mov 	(16)	ubDNDI_RESP(5,1)<2>		ubDNDI_UVCOPY_TEMP(0,0)<32;8,4>		{ NoDDClr }		//Copy U data
+	mov 	(16)	ubDNDI_RESP(5,0)<2>		ubDNDI_UVCOPY_TEMP(0,2)<32;8,4>		{ NoDDChk }		//Copy V data
+	mov 	(16)	ubDNDI_RESP(5,33)<2>		ubDNDI_UVCOPY_TEMP(2,0)<32;8,4>		{ NoDDClr }		//Copy U data
+	mov 	(16)	ubDNDI_RESP(5,32)<2>		ubDNDI_UVCOPY_TEMP(2,2)<32;8,4>		{ NoDDChk }		//Copy V data
+	mov 	(16)	ubDNDI_RESP(5,65)<2>		ubDNDI_UVCOPY_TEMP(4,0)<32;8,4>		{ NoDDClr }		//Copy U data
+	mov 	(16)	ubDNDI_RESP(5,64)<2>		ubDNDI_UVCOPY_TEMP(4,2)<32;8,4>		{ NoDDChk }		//Copy V data
+	mov 	(16)	ubDNDI_RESP(5,97)<2>		ubDNDI_UVCOPY_TEMP(6,0)<32;8,4>		{ NoDDClr }		//Copy U data
+	mov 	(16)	ubDNDI_RESP(5,96)<2>		ubDNDI_UVCOPY_TEMP(6,2)<32;8,4>		{ NoDDChk }		//Copy V data
+
+
+
+// FileName:	DN_Save_422CP_16x8.asm
+// Author:		Vivek Kumar
+// Description:	Save one 16x8 blocks of DN output to the color pipe in 4-2-2 format
+
+
+.declare mubMSGHDR_DN_OUT_2   Base=r36.0      ElementSize=1  Type=ub
+
+
+mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            			// message header
+shl (1)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<0;1,0>:w     1:w  		{ NoDDClr }            // X origin * 2 (422 output)
+mov (1)     mdMSGHDR_DN_OUT(0,1)<1>		r7.1<0;1,0>:w          		{ NoDDClr, NoDDChk }   // Y origin
+mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x7000F:ud	{ NoDDClr, NoDDChk }            // block width and height (16x8)
+
+//M0.3	- 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1)		mudMSGHDR_DN_OUT(0,3)<1>		r2.4<0;1,0>:ud 	r7.26<0;1,0>:b		{ NoDDChk }
+
+// First 8 x 8 Block
+	mov (8)		mubMSGHDR_DN_OUT(1)<2>			ubDNDI_RESP(0,0)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(1,16)<2>		ubDNDI_RESP(0,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(2)<2>			ubDNDI_RESP(0,32)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(2,16)<2>		ubDNDI_RESP(0,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(3)<2>			ubDNDI_RESP(0,64)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(3,16)<2>		ubDNDI_RESP(0,80)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(4)<2>			ubDNDI_RESP(0,96)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(4,16)<2>		ubDNDI_RESP(0,112)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(1,1)<4>   	ubDNDI_RESP(5,1)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(1,17)<4>   	ubDNDI_RESP(5,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(1,3)<4>   	ubDNDI_RESP(5,0)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(1,19)<4>   	ubDNDI_RESP(5,16)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(2,1)<4>   	ubDNDI_RESP(5,33)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(2,17)<4>   	ubDNDI_RESP(5,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(2,3)<4>   	ubDNDI_RESP(5,32)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(2,19)<4>   	ubDNDI_RESP(5,48)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(3,1)<4>   	ubDNDI_RESP(5,65)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(3,17)<4>   	ubDNDI_RESP(5,81)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(3,3)<4>   	ubDNDI_RESP(5,64)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(3,19)<4>   	ubDNDI_RESP(5,80)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(4,1)<4>   	ubDNDI_RESP(5,97)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(4,17)<4>   	ubDNDI_RESP(5,113)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(4,3)<4>   	ubDNDI_RESP(5,96)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(4,19)<4>   	ubDNDI_RESP(5,112)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+// Second 8 x 8 Block
+mov	(8)	r36.0<1>:ud		r31.0<8;8,1>:ud
+add	(1)	r36.0<1>:ud		r36.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DN_OUT_2(1)<2>		ubDNDI_RESP(0,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(1,16)<2>	ubDNDI_RESP(0,24)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(2)<2>		ubDNDI_RESP(0,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(2,16)<2>	ubDNDI_RESP(0,56)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(3)<2>		ubDNDI_RESP(0,72)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(3,16)<2>	ubDNDI_RESP(0,88)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(4)<2>		ubDNDI_RESP(0,104)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(4,16)<2>	ubDNDI_RESP(0,120)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(1,1)<4>   	ubDNDI_RESP(5,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(1,17)<4>   	ubDNDI_RESP(5,25)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(1,3)<4>   	ubDNDI_RESP(5,8)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(1,19)<4>   	ubDNDI_RESP(5,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(2,1)<4>   	ubDNDI_RESP(5,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(2,17)<4>   	ubDNDI_RESP(5,57)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(2,3)<4>   	ubDNDI_RESP(5,40)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(2,19)<4>   	ubDNDI_RESP(5,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(3,1)<4>   	ubDNDI_RESP(5,73)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(3,17)<4>   	ubDNDI_RESP(5,89)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(3,3)<4>   	ubDNDI_RESP(5,72)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(3,19)<4>   	ubDNDI_RESP(5,88)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(4,1)<4>   	ubDNDI_RESP(5,105)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(4,17)<4>   	ubDNDI_RESP(5,121)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(4,3)<4>   	ubDNDI_RESP(5,104)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(4,19)<4>   	ubDNDI_RESP(5,120)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+//send out data through data port
+send (8)    null<1>:d    r31.0		0x5    0xA0A801B:ud
+send (8)    null<1>:d    r36.0	0x5    0xA0A801B:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/NV12_DN_NV12.g4a b/src/shaders/post_processing/gen7/NV12_DN_NV12.g4a
new file mode 100644
index 0000000..4c932b8
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DN_NV12.g4a
@@ -0,0 +1,420 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   40    // Total instruction count
+//    1    // Total kernel count
+
+.kernel NV12_DN_NV12
+.code
+
+
+
+// FileName:	DN_PL_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block) for planar format
+
+
+
+// FileName:	DN.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x45E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(4,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+
+	mov (2)    mudMSGHDR_HIST(1)<1>    	udDNDI_RESP(4,0)<2;2,1>    	// Move denoise history to MRF (4x2)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x10003:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x50003:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE |   X  |   X   |  X  |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	//|            X             |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	mov (1)		mubMSGHDR_ENC_STATS(1,0)<1>		ubDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr }				// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,3)<1>		uwDNDI_RESP(4,11)<0;1,0>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,4)<1>		uwDNDI_RESP(4,12)<2;2,1>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,9)<1>		uwDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,10)<1>	uwDNDI_RESP(4,9)<2;2,1>    		{ NoDDChk }				// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Load_UV_NV12_16x8.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x8 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Load_16x8.asm
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x8 block through DATAPORT
+
+//CHANGE	:  Read extra UV data to convert to 422. -rT
+//we are reading extra data in ALL cases irrespective of whether upsampling is reqd or not later on, to keep things simple.
+
+
+	add (2)		r27.0<1>:d				r7.0<2;2,1>:w				r4.4<2;2,1>:w				// Source Y Block origin
+	asr (1)  	r27.1<1>:d     			r27.1<0;1,0>:d       	1:w   						{ NoDDClr }	// U/V block origin should be half of Y's
+	mov (1)		r27.2<1>:ud				0x4000F:ud  					{ NoDDChk }	// U/V block width and height (8x5)
+    mov (8)     mudMSGHDR_UVCOPY(0)<1>    	r27.0<8;8,1>:ud
+	send (8)	udDNDI_UV_RESP(0)<1>		r36	0x4	0x2390001:ud
+
+	//Update Header for Save
+	mov (1)		mudMSGHDR_UVCOPY(0,2)<1>	0x3000F:ud									// U/V block width and height (8x4)
+
+
+
+// FileName:    DN_Save_Y_16x8.asm
+// Author:      Vivek Kumar
+// Description: Save one 16x8 blocks of Y channel of DN output for reference
+
+
+mov (8)     mudDN_Y_OUT(0,0)<1>     r0<8;8,1>:ud                                // message header
+mov (2)     mudDN_Y_OUT(0,0)<1>     r7.0<2;2,1>:w                  { NoDDClr }     // X origin
+mov (1)     mudDN_Y_OUT(0,2)<1>     0x7000F:ud    { NoDDChk }     // block width and height (16x8)
+
+//send out data through data port
+send (8)    null<1>:d    mudDN_Y_OUT      0x5    0xA0A8018:ud
+
+
+
+// FileName:	DN_Save_UV_NV12_16x8.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x8 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Save_16x8.asm
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x8 block through DATAPORT
+
+
+//Reuse the header from Load component
+//Header is modified at the end of load - to be usable for save.
+
+	mov (8)		mudMSGHDR_UVCOPY(1)<1>		udDNDI_UV_RESP(0)<8;8,1>
+	mov (8)		mudMSGHDR_UVCOPY(2)<1>		udDNDI_UV_RESP(1)<8;8,1>
+	send (8)	null<1>:d    r36	0x5    0x60A8019:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_AVS_Buf_0.g4a b/src/shaders/post_processing/gen7/PA_AVS_Buf_0.g4a
new file mode 100644
index 0000000..5a1c4b1
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_AVS_Buf_0.g4a
@@ -0,0 +1,539 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   39    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PA_AVS_Buf_0.asm
+// Author:			Vivek Kumar
+// Description:	Loads 8x8 AVS/IEF Packed data into Buffer 0
+
+
+
+// FileName     :   PA_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF Packed data into Buffer N
+
+//On IVB, for AVS module - set buffer pointers offset according to AVS Layout.
+//Change it to Sample Unorm layout in Shuffle modules.
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+        //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+        //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+        mov (1)   r22.4<1>:ud     0x400040:ud
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_0_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x50EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x00000000:ud                 // Enable ARGB channels
+
+
+        //OPT: rAVS_PAYLOAD.1 and .7 --> use NODDCLR, NODDCHK -rT
+        mov (1)   r25.7<1>:ud      r7.7:ud           { NoDDClr }
+        mov (1)   r25.1<1>:ud      r7.12:uw       { NoDDChk }
+
+
+    // set the vertical block number
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw           // save f0.0
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_AVS_WA_DONE_L0_0_
+
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f2.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+
+GEN7_AVS_WA_DONE_L0_0_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+    send (1)    uwBUFFER_0(0)<1>   r16    0x2    a0.0:ud
+    // Returns packed data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+    nop
+
+
diff --git a/src/shaders/post_processing/gen7/PA_AVS_Buf_1.g4a b/src/shaders/post_processing/gen7/PA_AVS_Buf_1.g4a
new file mode 100644
index 0000000..208f16d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_AVS_Buf_1.g4a
@@ -0,0 +1,531 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   37    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PA_AVS_Buf_1.asm 
+// Author:			Vivek Kumar
+// Description:	Loads 8x8 AVS/IEF Packed data into Buffer 1
+
+
+
+// FileName     :   PA_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF Packed data into Buffer N
+
+//On IVB, for AVS module - set buffer pointers offset according to AVS Layout.
+//Change it to Sample Unorm layout in Shuffle modules.
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_1_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x50EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x00000000:ud                 // Enable ARGB channels
+
+
+    // set the vertical block number
+
+        add (1)   r25.1<1>:ud    r7.12:uw  1:ud
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw           // save f0.0
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_AVS_WA_DONE_L0_1_
+
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f1.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+
+GEN7_AVS_WA_DONE_L0_1_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+    send (1)    uwBUFFER_1(0)<1>   r16    0x2    a0.0:ud
+    // Returns packed data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_1_:
+    nop
+
+
diff --git a/src/shaders/post_processing/gen7/PA_AVS_Buf_2.g4a b/src/shaders/post_processing/gen7/PA_AVS_Buf_2.g4a
new file mode 100644
index 0000000..f9ac1a7
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_AVS_Buf_2.g4a
@@ -0,0 +1,532 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   37    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PA_AVS_Buf_2.asm 
+// Author:			Vivek Kumar
+// Description:	Loads 8x8 AVS/IEF Packed data into Buffer 2
+
+
+
+// FileName     :   PA_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF Packed data into Buffer N
+
+//On IVB, for AVS module - set buffer pointers offset according to AVS Layout.
+//Change it to Sample Unorm layout in Shuffle modules.
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_2_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x50EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x00000000:ud                 // Enable ARGB channels
+
+
+    // set the vertical block number
+
+
+        add (1)   r25.1<1>:ud    r7.12:uw  2:ud
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw           // save f0.0
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_AVS_WA_DONE_L0_2_
+
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f1.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+
+GEN7_AVS_WA_DONE_L0_2_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+    send (1)    uwBUFFER_2(0)<1>   r16    0x2    a0.0:ud
+    // Returns packed data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_2_:
+    nop
+
+
diff --git a/src/shaders/post_processing/gen7/PA_AVS_Buf_3.g4a b/src/shaders/post_processing/gen7/PA_AVS_Buf_3.g4a
new file mode 100644
index 0000000..b4aec0a
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_AVS_Buf_3.g4a
@@ -0,0 +1,532 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   37    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PA_AVS_Buf_3.asm 
+// Author:			Vivek Kumar
+// Description:	Loads 8x8 AVS/IEF Packed data into Buffer 3
+
+
+
+// FileName     :   PA_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF Packed data into Buffer N
+
+//On IVB, for AVS module - set buffer pointers offset according to AVS Layout.
+//Change it to Sample Unorm layout in Shuffle modules.
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_3_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x50EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x00000000:ud                 // Enable ARGB channels
+
+
+    // set the vertical block number
+
+
+        add (1)   r25.1<1>:ud    r7.12:uw  3:ud
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw           // save f0.0
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_AVS_WA_DONE_L0_3_
+
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f1.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+
+GEN7_AVS_WA_DONE_L0_3_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+    send (1)    uwBUFFER_3(0)<1>   r16    0x2    a0.0:ud
+    // Returns packed data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_3_:
+    nop
+
+
diff --git a/src/shaders/post_processing/gen7/PA_DI_422CP.g4a b/src/shaders/post_processing/gen7/PA_DI_422CP.g4a
new file mode 100644
index 0000000..9f9bed0
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DI_422CP.g4a
@@ -0,0 +1,461 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   87    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PA_DI_422CP
+.code
+
+
+
+// FileName:	DI.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DI only case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:    DI_Save_422CP_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in 422 format to Color Pipe (IECP)
+
+
+.declare mubMSGHDR_DI_OUT1_1  Base=r18.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT1_2  Base=r21.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_1  Base=r24.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_2  Base=r27.0      ElementSize=1  Type=ub
+
+
+mov (8) r27.0<1>:ud     r0.0<8;8,1>:ud
+shl (1) r27.0<1>:ud     r7.0<0;1,0>:w            1:w  { NoDDClr }          // H. block origin need to be doubled
+mov (1) r27.1<1>:ud     r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r27.2<1>:ud     0x3000F:ud        { NoDDClr, NoDDChk }       // Block width and height (16x8)
+
+//M0.3  - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1)  r27.3<1>:ud     r2.4<0;1,0>:ud     r7.26<0;1,0>:b     { NoDDChk }
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r27<8;8,1>:ud
+mov (8) r24.0<1>:ud       r27<8;8,1>:ud
+
+
+// Pack 2nd field Y; First 8x4 block
+	mov (8)		mubMSGHDR_DI_OUT1_1(1)<2>			ubDNDI_RESP(0,0)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(1,16)<2>		ubDNDI_RESP(0,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(2)<2>			ubDNDI_RESP(0,32)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(2,16)<2>		ubDNDI_RESP(0,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; First 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,1)<4>   		ubDNDI_RESP(2,1)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,17)<4>   	ubDNDI_RESP(2,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,3)<4>	  	ubDNDI_RESP(2,0)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,19)<4>   	ubDNDI_RESP(2,16)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,1)<4>   		ubDNDI_RESP(2,33)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,17)<4>   	ubDNDI_RESP(2,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,3)<4>	  	ubDNDI_RESP(2,32)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,19)<4>   	ubDNDI_RESP(2,48)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+
+// Pack 2nd field Y; Second 8x4 block
+mov	(8)	r21.0<1>:ud		r18.0<8;8,1>:ud
+add	(1)	r21.0<1>:ud		r21.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DI_OUT1_2(1)<2>			ubDNDI_RESP(0,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(1,16)<2>		ubDNDI_RESP(0,24)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(2)<2>			ubDNDI_RESP(0,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(2,16)<2>		ubDNDI_RESP(0,56)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; Second 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,1)<4>   		ubDNDI_RESP(2,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,17)<4>		ubDNDI_RESP(2,25)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,3)<4>   		ubDNDI_RESP(2,8)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,19)<4>		ubDNDI_RESP(2,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,1)<4>   		ubDNDI_RESP(2,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,17)<4>		ubDNDI_RESP(2,57)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,3)<4>   		ubDNDI_RESP(2,40)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,19)<4>		ubDNDI_RESP(2,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+send (8)    null<1>:d    r18.0   0x5     0x60A801B:ud
+send (8)    null<1>:d    r21.0   0x5     0x60A801B:ud
+
+// Pack 1st field Y; 1st 8x4 block
+	mov (8)		mubMSGHDR_DI_OUT2_1(1)<2>			ubDNDI_RESP(4,0)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(1,16)<2>		ubDNDI_RESP(4,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(2)<2>			ubDNDI_RESP(4,32)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(2,16)<2>		ubDNDI_RESP(4,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 1st field U,V; 1st 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,1)<4>   		ubDNDI_RESP(6,1)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,17)<4>   	ubDNDI_RESP(6,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,3)<4>	  	ubDNDI_RESP(6,0)<8;4,2>		    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,19)<4>   	ubDNDI_RESP(6,16)<8;4,2>	    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,1)<4>   		ubDNDI_RESP(6,33)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,17)<4>   	ubDNDI_RESP(6,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,3)<4>	  	ubDNDI_RESP(6,32)<8;4,2>		    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,19)<4>   	ubDNDI_RESP(6,48)<8;4,2>	    { NoDDChk }    	// copy line of V directly to memory as optimization
+
+// Pack 1st field Y; 2nd 8x4 block
+mov	(8)	r27.0<1>:ud		r24.0<8;8,1>:ud
+add	(1)	r27.0<1>:ud		r27.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DI_OUT2_2(1)<2>			ubDNDI_RESP(4,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(1,16)<2>		ubDNDI_RESP(4,24)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(2)<2>			ubDNDI_RESP(4,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(2,16)<2>		ubDNDI_RESP(4,56)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 1st field U, V; 2nd 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,1)<4>   		ubDNDI_RESP(6,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,17)<4>		ubDNDI_RESP(6,25)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,3)<4>   		ubDNDI_RESP(6,8)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,19)<4>		ubDNDI_RESP(6,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,1)<4>   		ubDNDI_RESP(6,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,17)<4>		ubDNDI_RESP(6,57)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,3)<4>   		ubDNDI_RESP(6,40)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,19)<4>		ubDNDI_RESP(6,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+send (8)    null<1>:d    r24.0     0x5     0x60A801E:ud
+send (8)    null<1>:d    r27.0     0x5     0x60A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DI_PA.g4a b/src/shaders/post_processing/gen7/PA_DI_PA.g4a
new file mode 100644
index 0000000..f7a70f4
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DI_PA.g4a
@@ -0,0 +1,399 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   57    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PA_DI_PA
+.code
+
+
+
+// FileName:	DI.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DI only case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:    DI_Save_PA_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+add (4) a0.4<1>:uw   r2.28<4;4,1>:ub   608:w               // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud     r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d      r7.0<0;1,0>:w            1:w  { NoDDClr }          // H. block origin need to be doubled
+mov (1) r27.1<1>:d      r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r27.2<1>:ud     0x3001F:ud          { NoDDChk }          // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r27<8;8,1>:ud
+mov (8) r23.0<1>:ud       r27<8;8,1>:ud
+
+// Pack 2nd field Y
+    mov (16)    r[a0.4, 0]<2>      ubDNDI_RESP(0,0)               { NoDDClr }
+    mov (16)    r[a0.4, 32]<2>      ubDNDI_RESP(0,16)               { NoDDClr }
+    mov (16)    r[a0.4, 64]<2>      ubDNDI_RESP(0,32)               { NoDDClr }
+    mov (16)    r[a0.4, 96]<2>      ubDNDI_RESP(0,48)               { NoDDClr }
+// Pack 2nd field U
+    mov (8)     r[a0.5, 0]<4>      ubDNDI_RESP(2,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 32]<4>      ubDNDI_RESP(2,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 64]<4>      ubDNDI_RESP(2,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 96]<4>      ubDNDI_RESP(2,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 2nd field V
+    mov (8)     r[a0.6, 0]<4>      ubDNDI_RESP(2,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 32]<4>      ubDNDI_RESP(2,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 64]<4>      ubDNDI_RESP(2,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 96]<4>      ubDNDI_RESP(2,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+// Pack 1st field Y
+    mov (16)    r[a0.4, 160]<2>    ubDNDI_RESP(4,0)               { NoDDClr }
+    mov (16)    r[a0.4, 192]<2>    ubDNDI_RESP(4,16)               { NoDDClr }
+    mov (16)    r[a0.4, 224]<2>    ubDNDI_RESP(4,32)               { NoDDClr }
+    mov (16)    r[a0.4, 256]<2>    ubDNDI_RESP(4,48)               { NoDDClr }
+// Pack 1st field U
+    mov (8)     r[a0.5, 160]<4>    ubDNDI_RESP(6,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 192]<4>    ubDNDI_RESP(6,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 224]<4>    ubDNDI_RESP(6,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 256]<4>    ubDNDI_RESP(6,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 1st field V
+    mov (8)     r[a0.6, 160]<4>    ubDNDI_RESP(6,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 192]<4>    ubDNDI_RESP(6,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 224]<4>    ubDNDI_RESP(6,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 256]<4>    ubDNDI_RESP(6,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+//save the previous frame
+send (8)    null<1>:d    r18.0     0x5     0xA0A801B:ud
+
+//save the current frame
+send (8)    null<1>:d    r23.0     0x5     0xA0A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DNDI_422CP.g4a b/src/shaders/post_processing/gen7/PA_DNDI_422CP.g4a
new file mode 100644
index 0000000..13302e8
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DNDI_422CP.g4a
@@ -0,0 +1,537 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//  127    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PA_DNDI_422CP
+.code
+
+
+
+// FileName:	DNDI_PL_Core.asm
+// Author:		Tatiya, Rupesh
+
+
+
+// FileName:	DNDI_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4CE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+	mov (1)    mudMSGHDR_HIST(1)<1>		udDNDI_RESP(9,0)<0;1,0>		// Move denoise history to MRF (4x1)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x3:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Save_PA_16x4.asm
+// Author:		Vivek Kumar
+// Description:	Save one 16x4 blocks of DN output in Packed format for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1)  null<1>:w               r1.28<0;1,0>:ub     1:w
+
+add (4)		a0.4<1>:uw   r4.0<4;4,1>:ub   1024:w    // Initial Y,U,V offset in YUV422 block; it starts at m14
+
+mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            			// message header
+shl (1)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<0;1,0>:w     1:w  		{ NoDDClr }            // X origin * 2 (422 output)
+mov (1)     mdMSGHDR_DN_OUT(0,1)<1>		r7.1<0;1,0>:w          		{ NoDDClr, NoDDChk }   // Y origin
+mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x3001F:ud	{ NoDDChk }            // block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+	mov (16)    r[a0.4,  0]<2>:ub     ubDNDI_RESP(10,0) 	{ NoDDClr }	// 2nd field luma from current frame (line 0,2)
+	mov (16)    r[a0.4,  32]<2>:ub   ubDNDI_RESP(4,16) 		{ NoDDClr }	// 1st field luma from current frame (line 1,3)
+	mov (16)    r[a0.4,  64]<2>:ub     ubDNDI_RESP(10,16) 	{ NoDDClr }	// 2nd field luma from current frame (line 0,2)
+	mov (16)    r[a0.4,  96]<2>:ub   ubDNDI_RESP(5,16) 		{ NoDDClr }	// 1st field luma from current frame (line 1,3)
+	mov (8)     r[a0.5,  0]<4>:ub     ubDNDI_RESP(11,1)<16;8,2> { NoDDClr, NoDDChk }	// 2nd field U from current frame (line 0,2)
+	mov (8)     r[a0.5,  32]<4>:ub   ubDNDI_RESP(6,17)<16;8,2> 	{ NoDDClr, NoDDChk }	// 1st field U from current frame (line 1,3)
+	mov (8)     r[a0.5,  64]<4>:ub     ubDNDI_RESP(11,17)<16;8,2> { NoDDClr, NoDDChk }	// 2nd field U from current frame (line 0,2)
+	mov (8)     r[a0.5,  96]<4>:ub   ubDNDI_RESP(7,17)<16;8,2> 	{ NoDDClr, NoDDChk }	// 1st field U from current frame (line 1,3)
+	mov (8)     r[a0.6,  0]<4>:ub     ubDNDI_RESP(11,0)<16;8,2> 	{ NoDDChk }		// 2nd field V from current frame (line 0,2)
+	mov (8)     r[a0.6,  32]<4>:ub   ubDNDI_RESP(6,16)<16;8,2> 		{ NoDDChk }		// 1st field U from current frame (line 1,3)
+	mov (8)     r[a0.6,  64]<4>:ub     ubDNDI_RESP(11,16)<16;8,2> 	{ NoDDChk }		// 2nd field V from current frame (line 0,2)
+	mov (8)     r[a0.6,  96]<4>:ub   ubDNDI_RESP(7,16)<16;8,2> 		{ NoDDChk }		// 1st field U from current frame (line 1,3)
+jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+	mov (16)    r[a0.4,  0]<2>:ub       ubDNDI_RESP(4,0) 		{ NoDDClr }	// 1st field luma from current frame (line 0,2)
+	mov (16)    r[a0.4,  32]<2>:ub     ubDNDI_RESP(10,0) 	{ NoDDClr }	// 2nd field luma from current frame (line 1,3)
+	mov (16)    r[a0.4,  64]<2>:ub       ubDNDI_RESP(5,0) 		{ NoDDClr }	// 1st field luma from current frame (line 0,2)
+	mov (16)    r[a0.4,  96]<2>:ub     ubDNDI_RESP(10,16) 	{ NoDDClr }	// 2nd field luma from current frame (line 1,3)
+	mov (8)     r[a0.5,  0]<4>:ub       ubDNDI_RESP(6,1)<16;8,2> 			{ NoDDClr, NoDDChk }	// 1st field U from current frame (line 0,2)
+	mov (8)     r[a0.5,  32]<4>:ub     ubDNDI_RESP(11,1)<16;8,2> 	{ NoDDClr, NoDDChk }	// 2nd field U from current frame (line 1,3)
+	mov (8)     r[a0.5,  64]<4>:ub       ubDNDI_RESP(7,1)<16;8,2> 			{ NoDDClr, NoDDChk }	// 1st field U from current frame (line 0,2)
+	mov (8)     r[a0.5,  96]<4>:ub     ubDNDI_RESP(11,17)<16;8,2> 	{ NoDDClr, NoDDChk }	// 2nd field U from current frame (line 1,3)
+	mov (8)     r[a0.6,  0]<4>:ub       ubDNDI_RESP(6,0)<16;8,2> 			{ NoDDChk }	// 1st field V from current frame (line 0,2)
+	mov (8)     r[a0.6,  32]<4>:ub     ubDNDI_RESP(11,0)<16;8,2> 	{ NoDDChk }	// 2nd field V from current frame (line 1,3)
+	mov (8)     r[a0.6,  64]<4>:ub       ubDNDI_RESP(7,0)<16;8,2> 			{ NoDDChk }	// 1st field V from current frame (line 0,2)
+	mov (8)     r[a0.6,  96]<4>:ub     ubDNDI_RESP(11,16)<16;8,2> 	{ NoDDChk }	// 2nd field V from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8)    null<1>:d    r31.0		0x5    0xA0A8018:ud
+
+
+
+// FileName:    DI_Save_422CP_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in 422 format to Color Pipe (IECP)
+
+
+.declare mubMSGHDR_DI_OUT1_1  Base=r18.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT1_2  Base=r21.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_1  Base=r24.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_2  Base=r27.0      ElementSize=1  Type=ub
+
+
+mov (8) r27.0<1>:ud     r0.0<8;8,1>:ud
+shl (1) r27.0<1>:ud     r7.0<0;1,0>:w            1:w  { NoDDClr }          // H. block origin need to be doubled
+mov (1) r27.1<1>:ud     r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r27.2<1>:ud     0x3000F:ud        { NoDDClr, NoDDChk }       // Block width and height (16x8)
+
+//M0.3  - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1)  r27.3<1>:ud     r2.4<0;1,0>:ud     r7.26<0;1,0>:b     { NoDDChk }
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r27<8;8,1>:ud
+mov (8) r24.0<1>:ud       r27<8;8,1>:ud
+
+
+// Pack 2nd field Y; First 8x4 block
+	mov (8)		mubMSGHDR_DI_OUT1_1(1)<2>			ubDNDI_RESP(0,0)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(1,16)<2>		ubDNDI_RESP(0,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(2)<2>			ubDNDI_RESP(0,32)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(2,16)<2>		ubDNDI_RESP(0,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; First 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,1)<4>   		ubDNDI_RESP(2,1)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,17)<4>   	ubDNDI_RESP(2,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,3)<4>	  	ubDNDI_RESP(2,0)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,19)<4>   	ubDNDI_RESP(2,16)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,1)<4>   		ubDNDI_RESP(2,33)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,17)<4>   	ubDNDI_RESP(2,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,3)<4>	  	ubDNDI_RESP(2,32)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,19)<4>   	ubDNDI_RESP(2,48)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+
+// Pack 2nd field Y; Second 8x4 block
+mov	(8)	r21.0<1>:ud		r18.0<8;8,1>:ud
+add	(1)	r21.0<1>:ud		r21.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DI_OUT1_2(1)<2>			ubDNDI_RESP(0,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(1,16)<2>		ubDNDI_RESP(0,24)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(2)<2>			ubDNDI_RESP(0,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(2,16)<2>		ubDNDI_RESP(0,56)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; Second 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,1)<4>   		ubDNDI_RESP(2,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,17)<4>		ubDNDI_RESP(2,25)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,3)<4>   		ubDNDI_RESP(2,8)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,19)<4>		ubDNDI_RESP(2,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,1)<4>   		ubDNDI_RESP(2,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,17)<4>		ubDNDI_RESP(2,57)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,3)<4>   		ubDNDI_RESP(2,40)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,19)<4>		ubDNDI_RESP(2,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+send (8)    null<1>:d    r18.0   0x5     0x60A801B:ud
+send (8)    null<1>:d    r21.0   0x5     0x60A801B:ud
+
+// Pack 1st field Y; 1st 8x4 block
+	mov (8)		mubMSGHDR_DI_OUT2_1(1)<2>			ubDNDI_RESP(4,0)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(1,16)<2>		ubDNDI_RESP(4,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(2)<2>			ubDNDI_RESP(4,32)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(2,16)<2>		ubDNDI_RESP(4,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 1st field U,V; 1st 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,1)<4>   		ubDNDI_RESP(6,1)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,17)<4>   	ubDNDI_RESP(6,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,3)<4>	  	ubDNDI_RESP(6,0)<8;4,2>		    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,19)<4>   	ubDNDI_RESP(6,16)<8;4,2>	    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,1)<4>   		ubDNDI_RESP(6,33)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,17)<4>   	ubDNDI_RESP(6,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,3)<4>	  	ubDNDI_RESP(6,32)<8;4,2>		    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,19)<4>   	ubDNDI_RESP(6,48)<8;4,2>	    { NoDDChk }    	// copy line of V directly to memory as optimization
+
+// Pack 1st field Y; 2nd 8x4 block
+mov	(8)	r27.0<1>:ud		r24.0<8;8,1>:ud
+add	(1)	r27.0<1>:ud		r27.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DI_OUT2_2(1)<2>			ubDNDI_RESP(4,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(1,16)<2>		ubDNDI_RESP(4,24)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(2)<2>			ubDNDI_RESP(4,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(2,16)<2>		ubDNDI_RESP(4,56)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 1st field U, V; 2nd 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,1)<4>   		ubDNDI_RESP(6,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,17)<4>		ubDNDI_RESP(6,25)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,3)<4>   		ubDNDI_RESP(6,8)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,19)<4>		ubDNDI_RESP(6,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,1)<4>   		ubDNDI_RESP(6,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,17)<4>		ubDNDI_RESP(6,57)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,3)<4>   		ubDNDI_RESP(6,40)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,19)<4>		ubDNDI_RESP(6,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+send (8)    null<1>:d    r24.0     0x5     0x60A801E:ud
+send (8)    null<1>:d    r27.0     0x5     0x60A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DNDI_PA.g4a b/src/shaders/post_processing/gen7/PA_DNDI_PA.g4a
new file mode 100644
index 0000000..b42149c
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DNDI_PA.g4a
@@ -0,0 +1,475 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   97    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PA_DNDI_PA
+.code
+
+
+
+// FileName:	DNDI_PL_Core.asm
+// Author:		Tatiya, Rupesh
+
+
+
+// FileName:	DNDI_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4CE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+	mov (1)    mudMSGHDR_HIST(1)<1>		udDNDI_RESP(9,0)<0;1,0>		// Move denoise history to MRF (4x1)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x3:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Save_PA_16x4.asm
+// Author:		Vivek Kumar
+// Description:	Save one 16x4 blocks of DN output in Packed format for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1)  null<1>:w               r1.28<0;1,0>:ub     1:w
+
+add (4)		a0.4<1>:uw   r4.0<4;4,1>:ub   1024:w    // Initial Y,U,V offset in YUV422 block; it starts at m14
+
+mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            			// message header
+shl (1)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<0;1,0>:w     1:w  		{ NoDDClr }            // X origin * 2 (422 output)
+mov (1)     mdMSGHDR_DN_OUT(0,1)<1>		r7.1<0;1,0>:w          		{ NoDDClr, NoDDChk }   // Y origin
+mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x3001F:ud	{ NoDDChk }            // block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+	mov (16)    r[a0.4,  0]<2>:ub     ubDNDI_RESP(10,0) 	{ NoDDClr }	// 2nd field luma from current frame (line 0,2)
+	mov (16)    r[a0.4,  32]<2>:ub   ubDNDI_RESP(4,16) 		{ NoDDClr }	// 1st field luma from current frame (line 1,3)
+	mov (16)    r[a0.4,  64]<2>:ub     ubDNDI_RESP(10,16) 	{ NoDDClr }	// 2nd field luma from current frame (line 0,2)
+	mov (16)    r[a0.4,  96]<2>:ub   ubDNDI_RESP(5,16) 		{ NoDDClr }	// 1st field luma from current frame (line 1,3)
+	mov (8)     r[a0.5,  0]<4>:ub     ubDNDI_RESP(11,1)<16;8,2> { NoDDClr, NoDDChk }	// 2nd field U from current frame (line 0,2)
+	mov (8)     r[a0.5,  32]<4>:ub   ubDNDI_RESP(6,17)<16;8,2> 	{ NoDDClr, NoDDChk }	// 1st field U from current frame (line 1,3)
+	mov (8)     r[a0.5,  64]<4>:ub     ubDNDI_RESP(11,17)<16;8,2> { NoDDClr, NoDDChk }	// 2nd field U from current frame (line 0,2)
+	mov (8)     r[a0.5,  96]<4>:ub   ubDNDI_RESP(7,17)<16;8,2> 	{ NoDDClr, NoDDChk }	// 1st field U from current frame (line 1,3)
+	mov (8)     r[a0.6,  0]<4>:ub     ubDNDI_RESP(11,0)<16;8,2> 	{ NoDDChk }		// 2nd field V from current frame (line 0,2)
+	mov (8)     r[a0.6,  32]<4>:ub   ubDNDI_RESP(6,16)<16;8,2> 		{ NoDDChk }		// 1st field U from current frame (line 1,3)
+	mov (8)     r[a0.6,  64]<4>:ub     ubDNDI_RESP(11,16)<16;8,2> 	{ NoDDChk }		// 2nd field V from current frame (line 0,2)
+	mov (8)     r[a0.6,  96]<4>:ub   ubDNDI_RESP(7,16)<16;8,2> 		{ NoDDChk }		// 1st field U from current frame (line 1,3)
+jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+	mov (16)    r[a0.4,  0]<2>:ub       ubDNDI_RESP(4,0) 		{ NoDDClr }	// 1st field luma from current frame (line 0,2)
+	mov (16)    r[a0.4,  32]<2>:ub     ubDNDI_RESP(10,0) 	{ NoDDClr }	// 2nd field luma from current frame (line 1,3)
+	mov (16)    r[a0.4,  64]<2>:ub       ubDNDI_RESP(5,0) 		{ NoDDClr }	// 1st field luma from current frame (line 0,2)
+	mov (16)    r[a0.4,  96]<2>:ub     ubDNDI_RESP(10,16) 	{ NoDDClr }	// 2nd field luma from current frame (line 1,3)
+	mov (8)     r[a0.5,  0]<4>:ub       ubDNDI_RESP(6,1)<16;8,2> 			{ NoDDClr, NoDDChk }	// 1st field U from current frame (line 0,2)
+	mov (8)     r[a0.5,  32]<4>:ub     ubDNDI_RESP(11,1)<16;8,2> 	{ NoDDClr, NoDDChk }	// 2nd field U from current frame (line 1,3)
+	mov (8)     r[a0.5,  64]<4>:ub       ubDNDI_RESP(7,1)<16;8,2> 			{ NoDDClr, NoDDChk }	// 1st field U from current frame (line 0,2)
+	mov (8)     r[a0.5,  96]<4>:ub     ubDNDI_RESP(11,17)<16;8,2> 	{ NoDDClr, NoDDChk }	// 2nd field U from current frame (line 1,3)
+	mov (8)     r[a0.6,  0]<4>:ub       ubDNDI_RESP(6,0)<16;8,2> 			{ NoDDChk }	// 1st field V from current frame (line 0,2)
+	mov (8)     r[a0.6,  32]<4>:ub     ubDNDI_RESP(11,0)<16;8,2> 	{ NoDDChk }	// 2nd field V from current frame (line 1,3)
+	mov (8)     r[a0.6,  64]<4>:ub       ubDNDI_RESP(7,0)<16;8,2> 			{ NoDDChk }	// 1st field V from current frame (line 0,2)
+	mov (8)     r[a0.6,  96]<4>:ub     ubDNDI_RESP(11,16)<16;8,2> 	{ NoDDChk }	// 2nd field V from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8)    null<1>:d    r31.0		0x5    0xA0A8018:ud
+
+
+
+// FileName:    DI_Save_PA_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+add (4) a0.4<1>:uw   r2.28<4;4,1>:ub   608:w               // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud     r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d      r7.0<0;1,0>:w            1:w  { NoDDClr }          // H. block origin need to be doubled
+mov (1) r27.1<1>:d      r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r27.2<1>:ud     0x3001F:ud          { NoDDChk }          // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r27<8;8,1>:ud
+mov (8) r23.0<1>:ud       r27<8;8,1>:ud
+
+// Pack 2nd field Y
+    mov (16)    r[a0.4, 0]<2>      ubDNDI_RESP(0,0)               { NoDDClr }
+    mov (16)    r[a0.4, 32]<2>      ubDNDI_RESP(0,16)               { NoDDClr }
+    mov (16)    r[a0.4, 64]<2>      ubDNDI_RESP(0,32)               { NoDDClr }
+    mov (16)    r[a0.4, 96]<2>      ubDNDI_RESP(0,48)               { NoDDClr }
+// Pack 2nd field U
+    mov (8)     r[a0.5, 0]<4>      ubDNDI_RESP(2,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 32]<4>      ubDNDI_RESP(2,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 64]<4>      ubDNDI_RESP(2,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 96]<4>      ubDNDI_RESP(2,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 2nd field V
+    mov (8)     r[a0.6, 0]<4>      ubDNDI_RESP(2,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 32]<4>      ubDNDI_RESP(2,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 64]<4>      ubDNDI_RESP(2,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 96]<4>      ubDNDI_RESP(2,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+// Pack 1st field Y
+    mov (16)    r[a0.4, 160]<2>    ubDNDI_RESP(4,0)               { NoDDClr }
+    mov (16)    r[a0.4, 192]<2>    ubDNDI_RESP(4,16)               { NoDDClr }
+    mov (16)    r[a0.4, 224]<2>    ubDNDI_RESP(4,32)               { NoDDClr }
+    mov (16)    r[a0.4, 256]<2>    ubDNDI_RESP(4,48)               { NoDDClr }
+// Pack 1st field U
+    mov (8)     r[a0.5, 160]<4>    ubDNDI_RESP(6,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 192]<4>    ubDNDI_RESP(6,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 224]<4>    ubDNDI_RESP(6,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 256]<4>    ubDNDI_RESP(6,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 1st field V
+    mov (8)     r[a0.6, 160]<4>    ubDNDI_RESP(6,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 192]<4>    ubDNDI_RESP(6,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 224]<4>    ubDNDI_RESP(6,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 256]<4>    ubDNDI_RESP(6,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+//save the previous frame
+send (8)    null<1>:d    r18.0     0x5     0xA0A801B:ud
+
+//save the current frame
+send (8)    null<1>:d    r23.0     0x5     0xA0A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DNUV_PA.g4a b/src/shaders/post_processing/gen7/PA_DNUV_PA.g4a
new file mode 100644
index 0000000..cb1fd9c
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DNUV_PA.g4a
@@ -0,0 +1,2704 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+// 1319    // Total instruction count
+//    1    // Total kernel count
+
+
+.kernel YUY2_DNUV_YUY2
+.code
+
+
+
+//Module		: DN_UV_Setup
+//Author		: Tatiya, Rupesh
+//Description	: Initial Set-up for DN_UV
+
+
+
+
+// Module name	: ChromaDenoise.inc
+// Author		: Tatiya, Rupesh
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//======================================================
+//Interface for serpent mode Chroma Denoise, added by Le
+//======================================================
+//r1
+
+
+//noise history thresholds (low and high)
+
+
+//temporal difference thresholds (high and low)
+
+
+//noise history thresholds (low and high)
+//#define ubNoiseHistMaxHigh  r1.22
+//#define ubNoiseHistMaxLow  r1.23
+//#define ubNoiseHistDeltaHigh  r1.24
+//#define ubNoiseHistDeltaLow  r1.25
+
+//Gaussian thresholds
+
+
+//temporal difference thresholds (default)
+
+
+//r2
+//history thresholds (default)
+
+
+//denoise factor  (0-63)
+
+
+//====================== Binding table (Explicit To DNUV)=========================================
+//Used by DN_UV kernels
+
+
+	//Pointer to Current Frame UV
+
+
+//r1-r6
+	//CURBE GRFs used as TEMP : Used for max computation and storing max temporarily. : r1-r6
+
+
+	.declare	ubCURBE_TEMP	Base=r1.0	ElementSize=1	Type=ub
+	.declare	uwCURBE_TEMP	Base=r1.0	ElementSize=2	Type=uw
+	.declare	wCURBE_TEMP		Base=r1.0	ElementSize=2	Type=w
+	.declare	fCURBE_TEMP		Base=r1.0	ElementSize=4	Type=f
+	.declare	udCURBE_TEMP		Base=r1.0	ElementSize=4	Type=ud
+	.declare	uwMAX_ABS_DIFF	Base=r5.0	ElementSize=2	Type=uw
+
+	//r1
+
+
+	//r3
+
+
+    //r4
+
+//r7
+	//All of the following has to defined in Same GRF for optimal performance.
+
+
+//r8-24
+    //Previous Frame UV
+
+	.declare	udPREV_UV		Base=r8.0	ElementSize=4	Type=ud
+	.declare	ubPREV_UV		Base=r8.0	ElementSize=1	Type=ub
+
+
+//r25-48
+	//TEMP Space for any Usage.
+
+
+//=========================================================================
+//Definations and declarations for serpent mode Chroma Denoise, added by Le
+//========================================================================= 	  
+
+
+	.declare	udGNE_UV		Base=r24.0	ElementSize=4	Type=ud
+  .declare	fGNE_UV		Base=r24.0	ElementSize=4	Type=f
+  .declare	ubGNE_UV		Base=r24.0	ElementSize=1	Type=ub
+
+  .declare	udMSGHDR_BNE_SERP	Base=r25.0	ElementSize=4	Type=ud
+  .declare	udMSGSRC_BNE_SERP	Base=r26.0	ElementSize=4	Type=ud
+
+
+  .declare	ubDN_UV_Thresholds Base=r26.0	ElementSize=1	Type=ub
+  .declare	ubDN_UV_Thresholds_Temp  Base=r27.0	ElementSize=1	Type=ub
+  .declare	udDN_UV_Thresholds Base=r26.0	ElementSize=4	Type=ud
+  .declare	udDN_UV_Thresholds_Temp Base=r27.0	ElementSize=4	Type=ud
+  .declare	fDN_UV_Thresholds Base=r26.0	ElementSize=4	Type=f
+  .declare	fDN_UV_Thresholds_Temp Base=r27.0	ElementSize=4	Type=f 	
+
+
+//====================================================================================
+
+
+	//TEMP23: To hold V data for PL3 surfaces
+	.declare	udCURR_V_TEMP	Base=r25.0	ElementSize=4	Type=ud
+	.declare	ubCURR_V_TEMP	Base=r25.0	ElementSize=1	Type=ub
+
+	//GRFs to calculate Median: r25-r42
+	.declare	ubMEDIAN_TEMP	Base=r25.0	ElementSize=1	Type=ub
+
+	//18 GRFs to hold difference : r25-r42
+	.declare	wDIFF			Base=r25.0	ElementSize=2	Type=w
+	.declare	uwDIFF			Base=r25.0	ElementSize=2	Type=uw
+
+	//Temporal Diff
+	.declare	wDIFF_TEMPORAL			Base=r25.0	ElementSize=2	Type=w
+	.declare	ubDIFF_TEMPORAL			Base=r25.0	ElementSize=1	Type=ub
+
+	//4 GRFs to hold Sobel Value : r43-46
+	.declare	wSOBEL_X	Base=r43.0	ElementSize=2	Type=w
+	.declare	uwSOBEL		Base=r43.0	ElementSize=2	Type=uw
+
+
+	//2 GRFs to hold SOAD temporarily: r47-48
+	.declare	uwSOAD			Base=r47.0	ElementSize=2	Type=uw
+
+	//Temp GRFs to hold extra YUYV pixels: r43-r48
+	.declare	ubTEMP5			Base=r43.0	ElementSize=1	Type=ub
+
+	//Temp GRFs in Median Calculation: r47-r48
+	.declare	ubTEMP1			Base=r47.0	ElementSize=1	Type=ub
+
+	.declare	uwTEMP0			Base=r48.0	ElementSize=2	Type=uw
+	.declare	ubTEMP0			Base=r48.0	ElementSize=1	Type=ub
+
+	//Temp Space to store Median : r49-50
+
+	.declare	ubMEDIAN	Base=r49.0	ElementSize=1	Type=ub
+
+//r49
+
+
+//r50
+    //Message Source
+
+
+//r51
+	//DN_UV History Surface
+
+	.declare	udHIST_UV		Base=r51.0	ElementSize=4	Type=ud
+	.declare	ubHIST_UV		Base=r51.0	ElementSize=1	Type=ub
+
+//r52 - r91
+	//r52
+	//Current Frame UV
+
+
+	.declare	udCURR_UV		Base=r52.0	ElementSize=4	Type=ud
+	.declare	ubCURR_UV		Base=r52.0	ElementSize=1	Type=ub
+
+	//r54
+	//CURBE COPY
+
+
+	//r55
+
+
+	.declare 	uwSOAD_MIN_8x4		Base=r56.0	ElementSize=2	Type=uw
+
+	//r61
+
+
+	//r62
+
+
+	//History Surface Temp Origin
+
+
+    //r63
+    //Current Frame Y Temp Origin
+
+
+	//BNE Surface Origin
+
+
+    //r70
+
+	.declare	uwDIFF_TEMPORAL_SUM4x4	Base=r70.0	ElementSize=2	Type=uw  //4 GRFs
+
+	//r74-91 : For Saving Dest UV (PL2/PL3)
+
+
+	.declare	ubMSGPAYLOAD_UV0	Base=r75.0	ElementSize=1	Type=ub
+
+
+	.declare	ubMSGPAYLOAD_U		Base=r75.0	ElementSize=1	Type=ub
+
+
+	.declare	ubMSGPAYLOAD_UV1	Base=r84.0	ElementSize=1	Type=ub
+
+
+	.declare	ubMSGPAYLOAD_V		Base=r84.0	ElementSize=1	Type=ub
+
+	//r90
+
+	.declare	uwDIFF_TEMPORAL_SUM4x4_FINAL	Base=r90.0	ElementSize=2	Type=uw  //2 GRFs
+
+//r92-127
+	//Current Frame Y
+
+
+	//r92
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_0		Base=r92	ElementSize=2	Type=uw
+	//r101
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_1		Base=r101	ElementSize=2	Type=uw
+	//r110
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_2		Base=r110	ElementSize=2	Type=uw
+	//r119
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_3		Base=r119	ElementSize=2	Type=uw
+
+	.declare	udCURR_Y0		Base=r93.0	ElementSize=4	Type=ud
+    .declare	ubCURR_Y0		Base=r93.0	ElementSize=1	Type=ub
+    .declare	udCURR_Y1		Base=r102.0	ElementSize=4	Type=ud
+	.declare	ubCURR_Y1		Base=r102.0	ElementSize=1	Type=ub
+	.declare	udCURR_Y2		Base=r111.0	ElementSize=4	Type=ud
+	.declare	ubCURR_Y2		Base=r111.0	ElementSize=1	Type=ub
+	.declare	udCURR_Y3		Base=r120.0	ElementSize=4	Type=ud
+	.declare	ubCURR_Y3		Base=r120.0	ElementSize=1	Type=ub
+
+	//r92: To hold U data for PL3 surfaces
+	.declare	udCURR_U_TEMP		Base=r92.0	ElementSize=4	Type=ud
+    .declare	ubCURR_U_TEMP		Base=r92.0	ElementSize=1	Type=ub
+
+    //r112: To hold U data for PL3 surfaces
+	.declare	udPREV_U_TEMP		Base=r112.0	ElementSize=4	Type=ud
+	.declare	ubPREV_U_TEMP		Base=r112.0	ElementSize=1	Type=ub
+
+	//r120: To hold U data for PL3 surfaces
+	.declare	udPREV_V_TEMP		Base=r120.0	ElementSize=4	Type=ud
+	.declare	ubPREV_V_TEMP		Base=r120.0	ElementSize=1	Type=ub
+
+
+	// Initialize message source with r0.
+	mov (8)   r50.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r92.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r101.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r110.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r119.0<1>:ud		r0.0<8;8,1>:ud
+
+
+
+//Module Name 	: 	DN_UV_YUY2_Load_Curr_Frame_YUV
+//Author		:	Tatiya, Rupesh
+//Description	:	Loads Current Frame YUV data for YUY2 input.
+
+
+
+//Module name 	:  DN_UV_Load_Curr_Frame_UV
+//Author		:  Tatiya, Rupesh
+//Description	:  Loads Current Frame (UV only).
+//				   We need 4 extra rows (2 per field) and 2 extra pixel (1 each side) for both U and V each.
+//				   The processing size is 16x16 U and V each. So we need : U size - 18x20, V size - 18x20, UV size - 36x20, YUYV size - 72x20.
+
+
+
+
+//72x20 interleaved YUYV block is partitioned as follows:
+//				<------ 36 --------> <--------36 ------->
+//				------------------------------------------
+//				|   |	32x2 B1		|		32x2 B2	 |	 |
+//				| 4 |--------------------------------| 4 |
+//				| x |				|				 | x |
+//				|20 |     32x8 A1   |		32x8 A3  | 20|
+//				|   |---------------|----------------|	 |
+//				| C1|	  32x8 A2  	|	   32x8  A4	 | C2|
+//				|   |				|		 		 |	 |
+//				|   |--------------------------------|   |
+//				|   |	32x2 B3		|		32x2 B4	 |	 |
+//				------------------------------------------
+//
+// Cordinates: (x, y), (x, y+8), (x+32, y), (x+32, y+8), (x-4, y-2), (x+64, y-2),(x, y-2), (x+32, y-2), (x, y+16), (x+32, y+16)
+
+	//UV surface origin: (2xORIX, ORIY)
+	add  (2)	r7.4<1>:w		r7.0<2;2,1>:w	 	r4.4<2;2,1>:w	 { AccWrEn } // Source Block origin
+	shl  (1)	r7.4<1>:w		acc0.4<0;1,0>:w		1:w
+
+	//A1
+	mov  (2)	r92.0<1>:d		r7.4<2;2,1>:w	 	{ AccWrEn }		// Source Block origin
+ 	mov  (1)   	r92.2<1>:ud		0x7001F:ud		
+ 	send (8)	udCURR_Y0(0)<1>			r92			0x4	0x2890003:ud
+
+	//A2
+	mov  (1)    r101.0<1>:d	acc0.0<0;1,0>:d							
+ 	add  (1)    r101.1<1>:d	acc0.1<0;1,0>:d		8:d					
+ 	mov  (1)   	r101.2<1>:ud	0x7001F:ud		
+ 	send (8)	udCURR_Y1(0)<1>		r101		0x4	0x2890003:ud
+
+ 	//B1
+	mov  (1)    r50.0<1>:d	acc0.0<0;1,0>:d								
+	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			-2:d   				
+	mov  (1)   	r50.2<1>:ud	0x1001F:ud			
+	send (8)	udCURR_UV(0)<1>			r50		0x4	0x2290003:ud
+
+	//B3
+	mov  (1)    r50.0<1>:d	acc0.0<0;1,0>:d								
+	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			16:d   				
+	send (8)	udCURR_UV(18)<1>		r50		0x4	0x2290003:ud
+
+	//C1
+	add  (1)    r50.0<1>:d	acc0.0<0;1,0>:d			-4:d				
+	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			-2:d				
+	mov  (1)   	r50.2<1>:ud	0x130003:ud			
+ 	send (8)	ubTEMP5(0)<1>			r50		0x4	0x2390003:ud
+
+ 	//A3
+	add  (1)    r110.0<1>:d	acc0.0<0;1,0>:d		32:d				
+ 	mov  (1)    r110.1<1>:d	acc0.1<0;1,0>:d							
+ 	mov  (1)   	r110.2<1>:ud	0x7001F:ud		
+ 	send (8)	udCURR_Y2(0)<1>		r110		0x4	0x2890003:ud
+
+ 	//A4
+	add  (1)    r119.0<1>:d	acc0.0<0;1,0>:d		32:d				
+ 	add  (1)    r119.1<1>:d	acc0.1<0;1,0>:d		 8:d				
+ 	mov  (1)   	r119.2<1>:ud	0x7001F:ud		
+ 	send (8)	udCURR_Y3(0)<1>		r119		0x4	0x2890003:ud
+
+	//B2
+	add  (1)    r50.0<1>:d	acc0.0<0;1,0>:d			32:d				
+	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			-2:d   				
+	mov  (1)   	r50.2<1>:ud	0x1001F:ud			
+	send (8)	udCURR_UV(20)<1>		r50		0x4	0x2290003:ud
+
+	//B4
+	add  (1)    r50.0<1>:d	acc0.0<0;1,0>:d			32:d				
+	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			16:d   				
+	send (8)	udCURR_UV(38)<1>		r50		0x4	0x2290003:ud
+
+	//C2
+ 	add  (1)    r50.0<1>:d	acc0.0<0;1,0>:d			64:d				
+	add  (1)    r50.1<1>:d	acc0.1<0;1,0>:d			-2:d				
+	mov  (1)   	r50.2<1>:ud	0x130003:ud			
+ 	send (8)	ubTEMP5(3)<1>			r50		0x4	0x2390003:ud
+
+	//History Origin, Current Y origin and BNE surface origin - all are in inline GRF. Use , . -rT.
+
+ 	//Calculate Origin For History Surface: (ORIX/4, ORIY/4)
+	shr  (2)	r7.2<1>:w		r7.0<2;2,1>:w		2:w			
+
+	//Calculate Origin For BNE Surface: (ORIX/8, ORIY/8)
+	shr  (2)	r7.6<1>:w		r7.0<2;2,1>:w		3:w			
+
+
+
+//Module Name 	: 	DN_UV_YUY2_Load_Prev_Frame_YUV.asm
+//Author		:	Tatiya, Rupesh
+//Description	:	Loads Pevious Frame YUV data for YUY2 input.
+
+
+
+//Module Name 	: 	DN_UV_Load_Prev_Frame_UV
+//Author		:	Tatiya, Rupesh
+//Description	:   Loads Prev Frame (UV only). U size - 16x16, V size - 16x16, UV size - 32x16, YUYV size - 64x16.
+
+
+
+
+	mov  (2)	r50.0<1>:d	r7.4<2;2,1>:w	 	{ AccWrEn } 	// Source lock origin
+	mov  (1)	r50.2<1>:ud	0x7001F:ud  					// U/V block width and height (16x16)
+	send (8)	udPREV_UV(0)<1>			r50		0x4	0x2890000:ud
+
+	add  (1)	r50.1<1>:ud 	acc0.1<0;1,0>:d		8:w										// Add 16 to X origin
+	send (8)	udPREV_UV(8)<1>			r50		0x4	0x2890000:ud
+
+	add  (1)	r50.0<1>:ud 	acc0.0<0;1,0>:d		32:w					
+	mov  (1)	r50.1<1>:ud 	acc0.1<0;1,0>:d								
+	send (8)	udPREV_UV(16)<1>		r50		0x4	0x2890000:ud
+
+	add  (1)	r50.1<1>:ud 	acc0.0<0;1,0>:d		 8:w
+	send (8)	udPREV_UV(24)<1>		r50		0x4	0x2890000:ud
+
+
+	//TODO - See if History loading can be combined with Prev Frame Load. - rT
+
+
+//Module name 	:  DN_UV_Load_Hist_UV
+//Author		:  Tatiya, Rupesh
+//Description	:  Load DN History for UV denoise. 4x4 for each U & V.
+
+
+
+
+	mov  (2)	r50.0<1>:d	r7.2<2;2,1>:w			
+	mov  (1)	r50.2<1>:ud	0x30007:ud  		
+	send (8)	udHIST_UV(0)<1>			r50		0x4	0x2190022:ud
+
+
+
+//Module Name: DN_UV_YUY2_Extract_Curr_Frame_UV
+//Author	 : Tatiya, Rupesh
+//Description: Extract UV data from current YUY2 frame.
+
+//72x20 interleaved YUYV block is partitioned as follows:
+//				<------ 36 --------> <--------36 ------->
+//				------------------------------------------
+//				|   |	32x2 B1		|		32x2 B2	 |	 |
+//				| 4 |--------------------------------| 4 |
+//				| x |				|				 | x |
+//				|20 |     32x8 A1   |		32x8 A3  | 20|
+//				|   |---------------|----------------|	 |
+//				| C1|	  32x8 A2  	|	   32x8  A4	 | C2|
+//				|   |				|		 		 |	 |
+//				|   |--------------------------------|   |
+//				|   |	32x2 B3		|		32x2 B4	 |	 |
+//				------------------------------------------
+
+	// Set SRC pointers according to Input packing i.e. YUYV, YVYU, UYVY, VYUY
+	add  (1) a0.0<1>:uw    	r4.1<0;1,0>:ub    2976:w					//A1
+	add  (1) a0.1<1>:uw    	r4.1<0;1,0>:ub    3264:w				//A2
+	add  (1) a0.2<1>:uw    	r4.1<0;1,0>:ub    3552:w				//A3
+	add  (1) a0.3<1>:uw    	r4.1<0;1,0>:ub    3840:w				//A4
+	add  (1) a0.4<1>:uw    	r4.1<0;1,0>:ub    1664:w				//B1
+	add  (1) a0.5<1>:uw    	r4.1<0;1,0>:ub    2240:w				//B3B2
+	add  (1) a0.6<1>:uw    	r4.1<0;1,0>:ub    2880:w				//B4
+	add  (1) a0.7<1>:uw    	r4.1<0;1,0>:ub    1376:w				//C1C2
+
+	//Left 20x20 UV : 16x16 UV (Original)+4 extra rows(2 per field on top/bottom)+4 extra pixels(2 on left/right)
+
+	//A1
+		mov (16)  ubCURR_UV(2,2)<1>		r[a0.0, 0]<32;16,2>		
+		mov (16)  ubCURR_UV(3,2)<1>		r[a0.0, 32]<32;16,2>		
+		mov (16)  ubCURR_UV(4,2)<1>		r[a0.0, 64]<32;16,2>		
+		mov (16)  ubCURR_UV(5,2)<1>		r[a0.0, 96]<32;16,2>		
+		mov (16)  ubCURR_UV(6,2)<1>		r[a0.0, 128]<32;16,2>		
+		mov (16)  ubCURR_UV(7,2)<1>		r[a0.0, 160]<32;16,2>		
+		mov (16)  ubCURR_UV(8,2)<1>		r[a0.0, 192]<32;16,2>		
+		mov (16)  ubCURR_UV(9,2)<1>		r[a0.0, 224]<32;16,2>		
+
+	//A2
+		mov (16)  ubCURR_UV(10,2)<1>		r[a0.1, 0]<32;16,2>		
+		mov (16)  ubCURR_UV(11,2)<1>		r[a0.1, 32]<32;16,2>		
+		mov (16)  ubCURR_UV(12,2)<1>		r[a0.1, 64]<32;16,2>		
+		mov (16)  ubCURR_UV(13,2)<1>		r[a0.1, 96]<32;16,2>		
+		mov (16)  ubCURR_UV(14,2)<1>		r[a0.1, 128]<32;16,2>		
+		mov (16)  ubCURR_UV(15,2)<1>		r[a0.1, 160]<32;16,2>		
+		mov (16)  ubCURR_UV(16,2)<1>		r[a0.1, 192]<32;16,2>		
+		mov (16)  ubCURR_UV(17,2)<1>		r[a0.1, 224]<32;16,2>		
+
+	//B1
+		mov (16)  ubCURR_UV(0,2)<1>		r[a0.4, 0]<32;16,2>		
+		mov (16)  ubCURR_UV(1,2)<1>		r[a0.4, 32]<32;16,2>		
+
+	//B3
+		mov (16)  ubCURR_UV(18,2)<1>		r[a0.5, 0]<32;16,2>		
+		mov (16)  ubCURR_UV(19,2)<1>		r[a0.5, 32]<32;16,2>		
+
+	//TODO - Find a way to reduce this 40 SIMD2 instructions - rT
+	//C1
+		mov (2)  ubCURR_UV(0,0)<1>			r[a0.7, 0]<4;2,2>		
+		mov (2)  ubCURR_UV(1,0)<1>			r[a0.7, 4]<4;2,2>		
+		mov (2)  ubCURR_UV(2,0)<1>			r[a0.7, 8]<4;2,2>		
+		mov (2)  ubCURR_UV(3,0)<1>			r[a0.7, 12]<4;2,2>		
+		mov (2)  ubCURR_UV(4,0)<1>			r[a0.7, 16]<4;2,2>		
+		mov (2)  ubCURR_UV(5,0)<1>			r[a0.7, 20]<4;2,2>		
+		mov (2)  ubCURR_UV(6,0)<1>			r[a0.7, 24]<4;2,2>		
+		mov (2)  ubCURR_UV(7,0)<1>			r[a0.7, 28]<4;2,2>		
+		mov (2)  ubCURR_UV(8,0)<1>			r[a0.7, 32]<4;2,2>		
+		mov (2)  ubCURR_UV(9,0)<1>			r[a0.7, 36]<4;2,2>		
+		mov (2)  ubCURR_UV(10,0)<1>			r[a0.7, 40]<4;2,2>		
+		mov (2)  ubCURR_UV(11,0)<1>			r[a0.7, 44]<4;2,2>		
+		mov (2)  ubCURR_UV(12,0)<1>			r[a0.7, 48]<4;2,2>		
+		mov (2)  ubCURR_UV(13,0)<1>			r[a0.7, 52]<4;2,2>		
+		mov (2)  ubCURR_UV(14,0)<1>			r[a0.7, 56]<4;2,2>		
+		mov (2)  ubCURR_UV(15,0)<1>			r[a0.7, 60]<4;2,2>		
+		mov (2)  ubCURR_UV(16,0)<1>			r[a0.7, 64]<4;2,2>		
+		mov (2)  ubCURR_UV(17,0)<1>			r[a0.7, 68]<4;2,2>		
+		mov (2)  ubCURR_UV(18,0)<1>			r[a0.7, 72]<4;2,2>		
+		mov (2)  ubCURR_UV(19,0)<1>			r[a0.7, 76]<4;2,2>		
+
+	//2 right bytes from B2 - 2 rows
+		mov (2)  ubCURR_UV(0,18)<1>		r[a0.5, 64]<4;2,2>	
+		mov (2)  ubCURR_UV(1,18)<1>		r[a0.5, 96]<4;2,2>	
+
+	//2 right bytes from A3 - 8 rows
+		mov (2)  ubCURR_UV(2,18)<1>		r[a0.2, 0]<4;2,2>			
+		mov (2)  ubCURR_UV(3,18)<1>		r[a0.2, 32]<4;2,2>			
+		mov (2)  ubCURR_UV(4,18)<1>		r[a0.2, 64]<4;2,2>			
+		mov (2)  ubCURR_UV(5,18)<1>		r[a0.2, 96]<4;2,2>			
+		mov (2)  ubCURR_UV(6,18)<1>		r[a0.2, 128]<4;2,2>			
+		mov (2)  ubCURR_UV(7,18)<1>		r[a0.2, 160]<4;2,2>			
+		mov (2)  ubCURR_UV(8,18)<1>		r[a0.2, 192]<4;2,2>			
+		mov (2)  ubCURR_UV(9,18)<1>		r[a0.2, 224]<4;2,2>			
+
+	//2 right bytes from A4 - 8 rows
+		mov (2)  ubCURR_UV(10,18)<1>		r[a0.3, 0]<4;2,2>			
+		mov (2)  ubCURR_UV(11,18)<1>		r[a0.3, 32]<4;2,2>			
+		mov (2)  ubCURR_UV(12,18)<1>		r[a0.3, 64]<4;2,2>			
+		mov (2)  ubCURR_UV(13,18)<1>		r[a0.3, 96]<4;2,2>			
+		mov (2)  ubCURR_UV(14,18)<1>		r[a0.3, 128]<4;2,2>			
+		mov (2)  ubCURR_UV(15,18)<1>		r[a0.3, 160]<4;2,2>			
+		mov (2)  ubCURR_UV(16,18)<1>		r[a0.3, 192]<4;2,2>			
+		mov (2)  ubCURR_UV(17,18)<1>		r[a0.3, 224]<4;2,2>			
+
+	//2 right bytes from B4 - 2 rows
+		mov (2)  ubCURR_UV(18,18)<1>		r[a0.6, 0]<4;2,2>		
+		mov (2)  ubCURR_UV(19,18)<1>		r[a0.6, 32]<4;2,2>		
+
+	//Right 20x20 UV : 16x16 UV (Original)+4 extra rows(2 per field on top/bottom)+4 extra pixels(2 on left/right)
+
+	//A3
+		mov (16)  ubCURR_UV(22,2)<1>		r[a0.2, 0]<32;16,2>		
+		mov (16)  ubCURR_UV(23,2)<1>		r[a0.2, 32]<32;16,2>		
+		mov (16)  ubCURR_UV(24,2)<1>		r[a0.2, 64]<32;16,2>		
+		mov (16)  ubCURR_UV(25,2)<1>		r[a0.2, 96]<32;16,2>		
+		mov (16)  ubCURR_UV(26,2)<1>		r[a0.2, 128]<32;16,2>		
+		mov (16)  ubCURR_UV(27,2)<1>		r[a0.2, 160]<32;16,2>		
+		mov (16)  ubCURR_UV(28,2)<1>		r[a0.2, 192]<32;16,2>		
+		mov (16)  ubCURR_UV(29,2)<1>		r[a0.2, 224]<32;16,2>		
+
+	//A4
+		mov (16)  ubCURR_UV(30,2)<1>		r[a0.3, 0]<32;16,2>		
+		mov (16)  ubCURR_UV(31,2)<1>		r[a0.3, 32]<32;16,2>		
+		mov (16)  ubCURR_UV(32,2)<1>		r[a0.3, 64]<32;16,2>		
+		mov (16)  ubCURR_UV(33,2)<1>		r[a0.3, 96]<32;16,2>		
+		mov (16)  ubCURR_UV(34,2)<1>		r[a0.3, 128]<32;16,2>		
+		mov (16)  ubCURR_UV(35,2)<1>		r[a0.3, 160]<32;16,2>		
+		mov (16)  ubCURR_UV(36,2)<1>		r[a0.3, 192]<32;16,2>		
+		mov (16)  ubCURR_UV(37,2)<1>		r[a0.3, 224]<32;16,2>		
+
+	//B2
+		mov (16)  ubCURR_UV(20,2)<1>		r[a0.5, 64]<32;16,2>	
+		mov (16)  ubCURR_UV(21,2)<1>		r[a0.5, 96]<32;16,2>	
+
+	//B4
+		mov (16)  ubCURR_UV(38,2)<1>		r[a0.6, 0]<32;16,2>		
+		mov (16)  ubCURR_UV(39,2)<1>		r[a0.6, 32]<32;16,2>		
+
+	//TODO - Find a way to reduce this 40 SIMD2 instructions - rT
+	//C2
+		mov (2)  ubCURR_UV(20,18)<1>		r[a0.7, 96]<4;2,2>		
+		mov (2)  ubCURR_UV(21,18)<1>		r[a0.7, 100]<4;2,2>		
+		mov (2)  ubCURR_UV(22,18)<1>		r[a0.7, 104]<4;2,2>		
+		mov (2)  ubCURR_UV(23,18)<1>		r[a0.7, 108]<4;2,2>		
+		mov (2)  ubCURR_UV(24,18)<1>		r[a0.7, 112]<4;2,2>		
+		mov (2)  ubCURR_UV(25,18)<1>		r[a0.7, 116]<4;2,2>		
+		mov (2)  ubCURR_UV(26,18)<1>		r[a0.7, 120]<4;2,2>		
+		mov (2)  ubCURR_UV(27,18)<1>		r[a0.7, 124]<4;2,2>		
+		mov (2)  ubCURR_UV(28,18)<1>		r[a0.7, 128]<4;2,2>		
+		mov (2)  ubCURR_UV(29,18)<1>		r[a0.7, 132]<4;2,2>		
+		mov (2)  ubCURR_UV(30,18)<1>		r[a0.7, 136]<4;2,2>		
+		mov (2)  ubCURR_UV(31,18)<1>		r[a0.7, 140]<4;2,2>		
+		mov (2)  ubCURR_UV(32,18)<1>		r[a0.7, 144]<4;2,2>		
+		mov (2)  ubCURR_UV(33,18)<1>		r[a0.7, 148]<4;2,2>		
+		mov (2)  ubCURR_UV(34,18)<1>		r[a0.7, 152]<4;2,2>		
+		mov (2)  ubCURR_UV(35,18)<1>		r[a0.7, 156]<4;2,2>		
+		mov (2)  ubCURR_UV(36,18)<1>		r[a0.7, 160]<4;2,2>		
+		mov (2)  ubCURR_UV(37,18)<1>		r[a0.7, 164]<4;2,2>		
+		mov (2)  ubCURR_UV(38,18)<1>		r[a0.7, 168]<4;2,2>		
+		mov (2)  ubCURR_UV(39,18)<1>		r[a0.7, 172]<4;2,2>		
+
+	//2 left bytes from B1 - 2 rows
+		mov (2)  ubCURR_UV(20,0)<1>		r[a0.4, 28]<4;2,2>			
+		mov (2)  ubCURR_UV(21,0)<1>		r[a0.4, 60]<4;2,2>			
+
+	//2 left bytes from A1 - 8 rows
+		mov (2)  ubCURR_UV(22,0)<1>		r[a0.0, 28]<4;2,2>			
+		mov (2)  ubCURR_UV(23,0)<1>		r[a0.0, 60]<4;2,2>			
+		mov (2)  ubCURR_UV(24,0)<1>		r[a0.0, 92]<4;2,2>			
+		mov (2)  ubCURR_UV(25,0)<1>		r[a0.0, 124]<4;2,2>			
+		mov (2)  ubCURR_UV(26,0)<1>		r[a0.0, 156]<4;2,2>			
+		mov (2)  ubCURR_UV(27,0)<1>		r[a0.0, 188]<4;2,2>			
+		mov (2)  ubCURR_UV(28,0)<1>		r[a0.0, 220]<4;2,2>			
+		mov (2)  ubCURR_UV(29,0)<1>		r[a0.0, 252]<4;2,2>			
+
+	//2 left bytes from A2 - 8 rows
+		mov (2)  ubCURR_UV(30,0)<1>		r[a0.1, 28]<4;2,2>			
+		mov (2)  ubCURR_UV(31,0)<1>		r[a0.1, 60]<4;2,2>			
+		mov (2)  ubCURR_UV(32,0)<1>		r[a0.1, 92]<4;2,2>			
+		mov (2)  ubCURR_UV(33,0)<1>		r[a0.1, 124]<4;2,2>			
+		mov (2)  ubCURR_UV(34,0)<1>		r[a0.1, 156]<4;2,2>			
+		mov (2)  ubCURR_UV(35,0)<1>		r[a0.1, 188]<4;2,2>			
+		mov (2)  ubCURR_UV(36,0)<1>		r[a0.1, 220]<4;2,2>			
+		mov (2)  ubCURR_UV(37,0)<1>		r[a0.1, 252]<4;2,2>			
+
+	//2 left bytes from B3 - 2 rows
+		mov (2)  ubCURR_UV(38,0)<1>		r[a0.5, 28]<4;2,2>			
+		mov (2)  ubCURR_UV(39,0)<1>		r[a0.5, 60]<4;2,2>			
+
+
+
+// Module Name 	:	DN_UV_YUY2_Extract_Prev_Frame_UV
+// Author		:	Tatiya, Rupesh
+// Description	:	Extract UV from previous frame YUY2.
+
+	// Set SRC pointers according to Input packing i.e. YUYV, YVYU, UYVY, VYUY
+	add  (1) a0.0<1>:uw    	r4.1<0;1,0>:ub    256:w		
+	add  (1) a0.1<1>:uw    	r4.1<0;1,0>:ub    768:w		
+
+		mov (16)  ubPREV_UV(0,0)<1>		r[a0.0, 0]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(0,16)<1>		r[a0.0, 32]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(1,0)<1>		r[a0.0, 64]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(1,16)<1>		r[a0.0, 96]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(2,0)<1>		r[a0.0, 128]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(2,16)<1>		r[a0.0, 160]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(3,0)<1>		r[a0.0, 192]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(3,16)<1>		r[a0.0, 224]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(4,0)<1>		r[a0.0, 256]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(4,16)<1>		r[a0.0, 288]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(5,0)<1>		r[a0.0, 320]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(5,16)<1>		r[a0.0, 352]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(6,0)<1>		r[a0.0, 384]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(6,16)<1>		r[a0.0, 416]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(7,0)<1>		r[a0.0, 448]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(7,16)<1>		r[a0.0, 480]<32;16,2>:ub		
+
+		mov (16)  ubPREV_UV(8,0)<1>		r[a0.1, 0]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(8,16)<1>		r[a0.1, 32]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(9,0)<1>		r[a0.1, 64]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(9,16)<1>		r[a0.1, 96]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(10,0)<1>		r[a0.1, 128]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(10,16)<1>		r[a0.1, 160]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(11,0)<1>		r[a0.1, 192]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(11,16)<1>		r[a0.1, 224]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(12,0)<1>		r[a0.1, 256]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(12,16)<1>		r[a0.1, 288]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(13,0)<1>		r[a0.1, 320]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(13,16)<1>		r[a0.1, 352]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(14,0)<1>		r[a0.1, 384]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(14,16)<1>		r[a0.1, 416]<32;16,2>:ub		
+		mov (16)  ubPREV_UV(15,0)<1>		r[a0.1, 448]<32;16,2>:ub			
+		mov (16)  ubPREV_UV(15,16)<1>		r[a0.1, 480]<32;16,2>:ub		
+
+
+
+//Module Name : DN_UV_Noise_Detection_UV
+//Author	  : Tatiya, Rupesh
+//Description : Performs noise detection on 16x16 U and 16x16 V each.
+
+
+
+//Module Name 	: DN_UV_Move_CURBE_Inline_UV.asm
+//Author		: Tatiya, Rupesh
+
+
+
+
+	//Mov CURBE data to another space - so that it can be used as Temp Space --> r1 - r6
+	mov (4)	r54.28<1>:ub		r2.28<4;4,1>:ub		//Dest. YUY2 offset
+	mov (2) r54.5<1>:ud		r4.0<4;2,2>:ud		//Src YUY2 offset and Origin offset
+	mov (4)	r55.28<1>:ub		r1.0<4;4,1>:ub
+
+	mov (8) r61.20<1>:ub		r1.4<8;8,1>:ub		
+	mov (4) r61.28<1>:ub		r1.12<4;4,1>:ub		
+
+	//Move Inline Data to another space - so that it can be used as Temp Space --> r7
+	mov (4) r62.10<1>:w				r7.0<4;4,1>:w
+	mov (4) r63.10<1>:w		r7.4<4;4,1>:w
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	mov (1) a0.0:uw				1664:uw				
+	mov (1)	a0.1:uw	1816:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				1792:uw			
+	mov (1)	a0.1:uw	1820:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				1920:uw			
+	mov (1)	a0.1:uw	1848:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2048:uw			
+	mov (1)	a0.1:uw	1852:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	mov (1) a0.0:uw				2304:uw			
+	mov (1)	a0.1:uw	1880:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2432:uw			
+	mov (1)	a0.1:uw	1884:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2560:uw			
+	mov (1)	a0.1:uw	1912:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2688:uw			
+	mov (1)	a0.1:uw	1916:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+//Module 		: DN_UV_Noise_Reduction_UV
+//Author		: Tatiya, Rupesh
+//Description	: Performs Noise Reduction on 16x16 U and 16x16 V.
+//Tasks			: 1. Update weight history
+//				  2. Find if it block is motion block
+//				  3. Compute Denoised Pixel.
+
+
+
+
+//History is 1+1 byte every 4x4 U and 4x4 V.
+
+	cmp.l.f0.0 (16) null<1>:w		ubHIST_UV(0,0)<16;16,1>		r61.20<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w		ubHIST_UV(0,0)<16;16,1>		r61.22<0;2,1>:ub
+
+	mov (16)	uwCURBE_TEMP(0)<1>	0:w
+	mov (16)	uwCURBE_TEMP(1)<1>	0:w
+
+	//Compute diff betn curr and prev. - First 16 lines
+	// 8 lines here
+    add (16)	wDIFF_TEMPORAL(0)<1>			ubCURR_UV(2,2)<16;16,1>		-ubPREV_UV(0,0)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(1)<1>			ubCURR_UV(3,2)<16;16,1>		-ubPREV_UV(0,16)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(2)<1>			ubCURR_UV(4,2)<16;16,1>		-ubPREV_UV(0,32)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(3)<1>			ubCURR_UV(5,2)<16;16,1>		-ubPREV_UV(0,48)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(4)<1>			ubCURR_UV(6,2)<16;16,1>		-ubPREV_UV(0,64)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(5)<1>			ubCURR_UV(7,2)<16;16,1>		-ubPREV_UV(0,80)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(6)<1>			ubCURR_UV(8,2)<16;16,1>		-ubPREV_UV(0,96)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(7)<1>			ubCURR_UV(9,2)<16;16,1>		-ubPREV_UV(0,112)<16;16,1>		//Diff UV interleaved
+
+	//Update WT HIST
+	(-f0.0) shr 	(16) uwCURBE_TEMP(0)<1>		ubHIST_UV(0,0)<16;16,1>		1:w
+	(f1.0)  add 	(16) uwCURBE_TEMP(2)<1>		ubHIST_UV(0,0)<16;16,1>		r61.24<0;2,1>:ub
+	(f0.0)  mov 	(16) uwCURBE_TEMP(2)<1>		r61.20<0;2,1>:ub
+	(-f0.0.anyv) mov 	(16) uwCURBE_TEMP(2)<1>		ubHIST_UV(0,0)<16;16,1>
+
+	cmp.l.f0.0 (16) null<1>:w		ubHIST_UV(0,16)<16;16,1>	r61.20<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w		ubHIST_UV(0,16)<16;16,1>	r61.22<0;2,1>:ub
+
+	//Compute diff betn curr and prev. - First 16 lines
+	// 8 more lines here
+    add (16)	wDIFF_TEMPORAL(8)<1>			ubCURR_UV(10,2)<16;16,1>		-ubPREV_UV(0,128)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(9)<1>			ubCURR_UV(11,2)<16;16,1>		-ubPREV_UV(0,144)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(10)<1>			ubCURR_UV(12,2)<16;16,1>		-ubPREV_UV(0,160)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(11)<1>			ubCURR_UV(13,2)<16;16,1>		-ubPREV_UV(0,176)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(12)<1>			ubCURR_UV(14,2)<16;16,1>		-ubPREV_UV(0,192)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(13)<1>			ubCURR_UV(15,2)<16;16,1>		-ubPREV_UV(0,208)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(14)<1>			ubCURR_UV(16,2)<16;16,1>		-ubPREV_UV(0,224)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(15)<1>			ubCURR_UV(17,2)<16;16,1>		-ubPREV_UV(0,240)<16;16,1>		//Diff UV interleaved
+
+	(-f0.0) shr 	(16) uwCURBE_TEMP(1)<1>		ubHIST_UV(0,16)<16;16,1>	1:w
+	(f1.0)  add 	(16) uwCURBE_TEMP(3)<1>		ubHIST_UV(0,16)<16;16,1>	r61.24<0;2,1>:ub
+	(f0.0)  mov 	(16) uwCURBE_TEMP(3)<1>		r61.20<0;2,1>:ub
+	(-f0.0.anyv) mov(16) uwCURBE_TEMP(3)<1>		ubHIST_UV(0,16)<16;16,1>
+
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(0)<16;16,1>	(abs)wDIFF_TEMPORAL(1)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(2)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(3)<16;16,1>
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(4)<16;16,1>	(abs)wDIFF_TEMPORAL(5)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(6)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(7)<16;16,1>
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(8)<16;16,1>	(abs)wDIFF_TEMPORAL(9)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(10)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(2)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(11)<16;16,1>
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(12)<16;16,1>	(abs)wDIFF_TEMPORAL(13)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(14)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(3)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(15)<16;16,1>
+
+//Compute diff betn curr and prev. - Second 16 lines
+//13 lines.
+    add (16)	wDIFF_TEMPORAL(16)<1>		ubCURR_UV(22,2)<16;16,1>		-ubPREV_UV(8,0)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(17)<1>		ubCURR_UV(23,2)<16;16,1>		-ubPREV_UV(8,16)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(18)<1>		ubCURR_UV(24,2)<16;16,1>		-ubPREV_UV(8,32)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(19)<1>		ubCURR_UV(25,2)<16;16,1>		-ubPREV_UV(8,48)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(20)<1>		ubCURR_UV(26,2)<16;16,1>		-ubPREV_UV(8,64)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(21)<1>		ubCURR_UV(27,2)<16;16,1>		-ubPREV_UV(8,80)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(22)<1>		ubCURR_UV(28,2)<16;16,1>		-ubPREV_UV(8,96)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(23)<1>		ubCURR_UV(29,2)<16;16,1>		-ubPREV_UV(8,112)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(24)<1>		ubCURR_UV(30,2)<16;16,1>		-ubPREV_UV(8,128)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(25)<1>		ubCURR_UV(31,2)<16;16,1>		-ubPREV_UV(8,144)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(26)<1>		ubCURR_UV(32,2)<16;16,1>		-ubPREV_UV(8,160)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(27)<1>		ubCURR_UV(33,2)<16;16,1>		-ubPREV_UV(8,176)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(28)<1>		ubCURR_UV(34,2)<16;16,1>		-ubPREV_UV(8,192)<16;16,1>		//Diff UV interleaved
+
+//3 more lines
+    add (16)	wCURBE_TEMP(4)<1>		ubCURR_UV(35,2)<16;16,1>		-ubPREV_UV(8,208)<16;16,1>		//Diff UV interleaved
+    add (16)	wCURBE_TEMP(5)<1>		ubCURR_UV(36,2)<16;16,1>		-ubPREV_UV(8,224)<16;16,1>		//Diff UV interleaved
+    add (16)	wCURBE_TEMP(6)<1>		ubCURR_UV(37,2)<16;16,1>		-ubPREV_UV(8,240)<16;16,1>		//Diff UV interleaved
+
+	//16x4 to 8x4 - First 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>		uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>		uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+	//8x4 to 4x4 - First 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>		{ AccWrEn }
+
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(16)<16;16,1>	(abs)wDIFF_TEMPORAL(17)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(18)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(19)<16;16,1>
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(20)<16;16,1>	(abs)wDIFF_TEMPORAL(21)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(22)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(23)<16;16,1>
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(24)<16;16,1>	(abs)wDIFF_TEMPORAL(25)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(26)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(2)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(27)<16;16,1>
+
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(28)<16;16,1>	(abs)wCURBE_TEMP(4)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wCURBE_TEMP(5)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(3)<1>	acc0.0<16;16,1>:uw					(abs)wCURBE_TEMP(6)<16;16,1>
+
+	//Find if block is motion block - First 16 lines
+	cmp.g.f0.0  (16) null<1>:w				uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<16;16,1> 		r61.26<0;2,1>:ub
+
+	//Move TEMPORAL_SUM4x4 for SIMD16 use later.
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,0)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,2)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,4)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,6)<0;2,1>      
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,8)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,10)<0;2,1>     
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,12)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,14)<0;2,1>     
+
+	//Pick Appropriate Weight History Based on motion. - First 16 lines
+	(-f0.0) mov (16) uwCURBE_TEMP(0)<1>		uwCURBE_TEMP(2)<16;16,1>
+
+	//Actual DN - First 16 lines
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(0)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(0)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(2,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(2,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(2,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,0)<8;8,1>					uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,8)<8;8,1>					uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(0)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(0)<1> 			ubCURR_UV(2,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(0)<1>	wDIFF_TEMPORAL(0)<16;16,1>					ubCURR_UV(2,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(1)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(1)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(3,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(3,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(3,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,16)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,24)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(1)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(1)<1>		ubCURR_UV(3,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(1)<1>	wDIFF_TEMPORAL(1)<16;16,1>				ubCURR_UV(3,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(2)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(2)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(4,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(4,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(4,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,32)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,40)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(2)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(2)<1>		ubCURR_UV(4,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(2)<1>	wDIFF_TEMPORAL(2)<16;16,1>				ubCURR_UV(4,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(3)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(3)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(5,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(5,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(5,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,48)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,56)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(3)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(3)<1>		ubCURR_UV(5,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(3)<1>	wDIFF_TEMPORAL(3)<16;16,1>				ubCURR_UV(5,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(4)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(4)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(6,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(6,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(6,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,64)<8;8,1>					uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,72)<8;8,1>					uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(4)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(4)<1> 			ubCURR_UV(6,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(4)<1>	wDIFF_TEMPORAL(4)<16;16,1>					ubCURR_UV(6,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(5)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(5)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(7,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(7,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(7,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,80)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,88)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(5)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(5)<1>		ubCURR_UV(7,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(5)<1>	wDIFF_TEMPORAL(5)<16;16,1>				ubCURR_UV(7,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(6)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(6)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(8,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(8,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(8,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,96)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,104)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(6)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(6)<1>		ubCURR_UV(8,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(6)<1>	wDIFF_TEMPORAL(6)<16;16,1>				ubCURR_UV(8,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(7)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(7)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(9,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(9,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(9,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,112)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,120)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(7)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(7)<1>		ubCURR_UV(9,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(7)<1>	wDIFF_TEMPORAL(7)<16;16,1>				ubCURR_UV(9,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(8)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(8)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(10,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(10,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(10,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,128)<8;8,1>					uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,136)<8;8,1>					uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(8)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(8)<1> 			ubCURR_UV(10,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(8)<1>	wDIFF_TEMPORAL(8)<16;16,1>					ubCURR_UV(10,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(9)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(9)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(11,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(11,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(11,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,144)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,152)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(9)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(9)<1>		ubCURR_UV(11,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(9)<1>	wDIFF_TEMPORAL(9)<16;16,1>				ubCURR_UV(11,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(10)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(10)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(12,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(12,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(12,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,160)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,168)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(10)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(10)<1>		ubCURR_UV(12,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(10)<1>	wDIFF_TEMPORAL(10)<16;16,1>				ubCURR_UV(12,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(11)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(11)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(13,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(13,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(13,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,176)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,184)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(11)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(11)<1>		ubCURR_UV(13,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(11)<1>	wDIFF_TEMPORAL(11)<16;16,1>				ubCURR_UV(13,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(12)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(12)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(14,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(14,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(14,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,192)<8;8,1>					uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,200)<8;8,1>					uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(12)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(12)<1> 			ubCURR_UV(14,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(12)<1>	wDIFF_TEMPORAL(12)<16;16,1>					ubCURR_UV(14,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(13)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(13)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(15,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(15,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(15,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,208)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,216)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(13)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(13)<1>		ubCURR_UV(15,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(13)<1>	wDIFF_TEMPORAL(13)<16;16,1>				ubCURR_UV(15,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(14)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(14)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(16,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(16,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(16,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,224)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,232)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(14)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(14)<1>		ubCURR_UV(16,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(14)<1>	wDIFF_TEMPORAL(14)<16;16,1>				ubCURR_UV(16,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(15)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(15)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(17,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(17,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(17,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,240)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,248)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(15)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(15)<1>		ubCURR_UV(17,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(15)<1>	wDIFF_TEMPORAL(15)<16;16,1>				ubCURR_UV(17,2)<16;16,1>
+
+
+	//16x4 to 8x4 - Second 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+	//8x4 to 4x4 - Second 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>     { AccWrEn }
+
+	//Find if block is motion block - Second 16 lines
+	cmp.g.f1.0  (16) null<1>:w				uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<16;16,1> 		r61.26<0;2,1>:ub
+
+	//Move TEMPORAL_SUM4x4 for SIMD16 use later.
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,0)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,2)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,4)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,6)<0;2,1>      
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,8)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,10)<0;2,1>     
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,12)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,14)<0;2,1>     
+
+	//Pick Appropriate Weight History Based on motion. - Second 16 lines
+	(-f1.0) mov (16) uwCURBE_TEMP(1)<1>		uwCURBE_TEMP(3)<16;16,1>
+
+	//Actual DN - Second 16 lines
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(16)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(16)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(22,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(22,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(22,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,0)<8;8,1>					uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,8)<8;8,1>					uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(16)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(16)<1>			ubCURR_UV(22,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(16)<1>	wDIFF_TEMPORAL(16)<16;16,1>			ubCURR_UV(22,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(17)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(17)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(23,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(23,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(23,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,16)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,24)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(17)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(17)<1>			ubCURR_UV(23,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(17)<1>	wDIFF_TEMPORAL(17)<16;16,1>			ubCURR_UV(23,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(18)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(18)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(24,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(24,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(24,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,32)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,40)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(18)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(18)<1>			ubCURR_UV(24,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(18)<1>	wDIFF_TEMPORAL(18)<16;16,1>			ubCURR_UV(24,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(19)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(19)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(25,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(25,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(25,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,48)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,56)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(19)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(19)<1>			ubCURR_UV(25,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(19)<1>	wDIFF_TEMPORAL(19)<16;16,1>			ubCURR_UV(25,2)<16;16,1>
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(20)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(20)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(26,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(26,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(26,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,64)<8;8,1>					uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,72)<8;8,1>					uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(20)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(20)<1>			ubCURR_UV(26,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(20)<1>	wDIFF_TEMPORAL(20)<16;16,1>			ubCURR_UV(26,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(21)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(21)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(27,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(27,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(27,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,80)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,88)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(21)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(21)<1>			ubCURR_UV(27,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(21)<1>	wDIFF_TEMPORAL(21)<16;16,1>			ubCURR_UV(27,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(22)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(22)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(28,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(28,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(28,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,96)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,104)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(22)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(22)<1>			ubCURR_UV(28,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(22)<1>	wDIFF_TEMPORAL(22)<16;16,1>			ubCURR_UV(28,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(23)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(23)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(29,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(29,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(29,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,112)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,120)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(23)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(23)<1>			ubCURR_UV(29,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(23)<1>	wDIFF_TEMPORAL(23)<16;16,1>			ubCURR_UV(29,2)<16;16,1>
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(24)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(24)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(30,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(30,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(30,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,128)<8;8,1>					uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,136)<8;8,1>					uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(24)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(24)<1>			ubCURR_UV(30,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(24)<1>	wDIFF_TEMPORAL(24)<16;16,1>			ubCURR_UV(30,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(25)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(25)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(31,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(31,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(31,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,144)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,152)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(25)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(25)<1>			ubCURR_UV(31,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(25)<1>	wDIFF_TEMPORAL(25)<16;16,1>			ubCURR_UV(31,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(26)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(26)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(32,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(32,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(32,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,160)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,168)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(26)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(26)<1>			ubCURR_UV(32,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(26)<1>	wDIFF_TEMPORAL(26)<16;16,1>			ubCURR_UV(32,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(27)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(27)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(33,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(33,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(33,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,176)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,184)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(27)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(27)<1>			ubCURR_UV(33,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(27)<1>	wDIFF_TEMPORAL(27)<16;16,1>			ubCURR_UV(33,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(28)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(28)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(34,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(34,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(34,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,192)<8;8,1>					uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,200)<8;8,1>					uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(28)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(28)<1>			ubCURR_UV(34,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(28)<1>	wDIFF_TEMPORAL(28)<16;16,1>				ubCURR_UV(34,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(4)<16;16,1>				r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(4)<16;16,1>				r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(35,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(35,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(35,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,208)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,216)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wCURBE_TEMP(4)<1>				acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wCURBE_TEMP(4)<1>				ubCURR_UV(35,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wCURBE_TEMP(4)<1>		wCURBE_TEMP(4)<16;16,1>				ubCURR_UV(35,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(5)<16;16,1>				r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(5)<16;16,1>				r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(36,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(36,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(36,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,224)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,232)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wCURBE_TEMP(5)<1> 				acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wCURBE_TEMP(5)<1>				ubCURR_UV(36,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wCURBE_TEMP(5)<1>		wCURBE_TEMP(5)<16;16,1>				ubCURR_UV(36,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(6)<16;16,1>				r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(6)<16;16,1>				r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(37,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(37,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(37,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,240)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,248)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wCURBE_TEMP(6)<1>				acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wCURBE_TEMP(6)<1>				ubCURR_UV(37,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wCURBE_TEMP(6)<1>		wCURBE_TEMP(6)<16;16,1>				ubCURR_UV(37,2)<16;16,1>
+
+	//Pack Weight History WORD -> BYTE
+	mov (16) ubCURBE_TEMP(3,0)<1>		ubCURBE_TEMP(0)<32;16,2>	
+	mov (16) ubCURBE_TEMP(3,16)<1>		ubCURBE_TEMP(1)<32;16,2>	
+
+
+
+//Module Name 	: DN_UV_Compute_BNE_UV
+//Author		: Tatiya, Rupesh
+//Description	: Computes minimum SOAD for each 16x4 block.
+
+	cmp.l.f0.0  (8) null:w     				uwSOAD_MIN_8x4(0,12)<16;4,1> 	uwSOAD_MIN_8x4(2,12)<16;4,1>
+	(f0.0)sel	(8) uwCURBE_TEMP(1,0)<1>	uwSOAD_MIN_8x4(0,12)<16;4,1> 	uwSOAD_MIN_8x4(2,12)<16;4,1>
+
+    mov  (8)	ubCURBE_TEMP(1)<1>			ubCURBE_TEMP(1)<16;8,2>
+
+
+
+//Module Name 	: DN_UV_YUY2_Pack_Denoised_UV
+//Name			: Tatiya, Rupesh
+//Description	: Pack UV denoised data based on YUY2 input.
+
+
+
+//Module Name 	: DN_UV_Pack_Denoised_UV
+//Name			: Tatiya, Rupesh
+//Description	: Pack UV denoised data based on PL2/PL3/PA.
+
+
+	add  (1) a0.0<1>:uw    	r54.21<0;1,0>:ub    2976:w		
+	add  (1) a0.1<1>:uw    	r54.21<0;1,0>:ub    3264:w		
+	add  (1) a0.2<1>:uw    	r54.21<0;1,0>:ub    3552:w		
+	add  (1) a0.3<1>:uw    	r54.21<0;1,0>:ub    3840:w		
+
+//First 8 lines.
+	mov  (16)	r[a0.0, 0]<2>:ub		ubDIFF_TEMPORAL(0)<32;16,2>
+	mov  (16)	r[a0.0, 32]<2>:ub		ubDIFF_TEMPORAL(1)<32;16,2>
+	mov  (16)	r[a0.0, 64]<2>:ub		ubDIFF_TEMPORAL(2)<32;16,2>
+	mov  (16)	r[a0.0, 96]<2>:ub		ubDIFF_TEMPORAL(3)<32;16,2>
+	mov  (16)	r[a0.0, 128]<2>:ub		ubDIFF_TEMPORAL(4)<32;16,2>
+	mov  (16)	r[a0.0, 160]<2>:ub		ubDIFF_TEMPORAL(5)<32;16,2>
+	mov  (16)	r[a0.0, 192]<2>:ub		ubDIFF_TEMPORAL(6)<32;16,2>
+	mov  (16)	r[a0.0, 224]<2>:ub		ubDIFF_TEMPORAL(7)<32;16,2>
+
+//Second 8 lines
+	mov  (16)	r[a0.1, 0]<2>:ub		ubDIFF_TEMPORAL(8)<32;16,2>
+	mov  (16)	r[a0.1, 32]<2>:ub		ubDIFF_TEMPORAL(9)<32;16,2>
+	mov  (16)	r[a0.1, 64]<2>:ub		ubDIFF_TEMPORAL(10)<32;16,2>
+	mov  (16)	r[a0.1, 96]<2>:ub		ubDIFF_TEMPORAL(11)<32;16,2>
+	mov  (16)	r[a0.1, 128]<2>:ub		ubDIFF_TEMPORAL(12)<32;16,2>
+	mov  (16)	r[a0.1, 160]<2>:ub		ubDIFF_TEMPORAL(13)<32;16,2>
+	mov  (16)	r[a0.1, 192]<2>:ub		ubDIFF_TEMPORAL(14)<32;16,2>
+	mov  (16)	r[a0.1, 224]<2>:ub		ubDIFF_TEMPORAL(15)<32;16,2>
+
+//Third 8 lines
+	mov  (16)	r[a0.2, 0]<2>:ub		ubDIFF_TEMPORAL(16)<32;16,2>
+	mov  (16)	r[a0.2, 32]<2>:ub		ubDIFF_TEMPORAL(17)<32;16,2>
+	mov  (16)	r[a0.2, 64]<2>:ub		ubDIFF_TEMPORAL(18)<32;16,2>
+	mov  (16)	r[a0.2, 96]<2>:ub		ubDIFF_TEMPORAL(19)<32;16,2>
+	mov  (16)	r[a0.2, 128]<2>:ub		ubDIFF_TEMPORAL(20)<32;16,2>
+	mov  (16)	r[a0.2, 160]<2>:ub		ubDIFF_TEMPORAL(21)<32;16,2>
+	mov  (16)	r[a0.2, 192]<2>:ub		ubDIFF_TEMPORAL(22)<32;16,2>
+	mov  (16)	r[a0.2, 224]<2>:ub		ubDIFF_TEMPORAL(23)<32;16,2>
+
+//Fourth 8 lines
+//5 lines first
+	mov  (16)	r[a0.3, 0]<2>:ub		ubDIFF_TEMPORAL(24)<32;16,2>
+	mov  (16)	r[a0.3, 32]<2>:ub		ubDIFF_TEMPORAL(25)<32;16,2>
+	mov  (16)	r[a0.3, 64]<2>:ub		ubDIFF_TEMPORAL(26)<32;16,2>
+	mov  (16)	r[a0.3, 96]<2>:ub		ubDIFF_TEMPORAL(27)<32;16,2>
+	mov  (16)	r[a0.3, 128]<2>:ub		ubDIFF_TEMPORAL(28)<32;16,2>
+
+//3 more lines
+	mov  (16)	r[a0.3, 160]<2>:ub		ubCURBE_TEMP(4)<32;16,2>
+	mov  (16)	r[a0.3, 192]<2>:ub		ubCURBE_TEMP(5)<32;16,2>
+	mov  (16)	r[a0.3, 224]<2>:ub		ubCURBE_TEMP(6)<32;16,2>
+
+
+	//TODO - See if History saving can be combined with Curr Frame Save. - rT
+
+
+//Module Name 	: DN_UV_Save_Hist_UV
+//Author		: Tatiya, Rupesh
+//Description	: Saves DN history for UV data.
+
+	mov (8)  r3.0<1>:ud	r0.0<8;8,1>:ud
+	mov (2)	 r3.0<1>:d	r62.12<2;2,1>:w				
+	mov (1)	 r3.2<1>:d	0x30007:ud		
+
+	send (8) null<1>:d	r3		0x5		0x40A8021:ud
+
+
+
+//Module Name	: DN_UV_Save_BNE_UV
+//Author		: Tatiya, Rupesh
+//Description	: Saves BNE values for 16x16 U and 16x16 V.
+
+	mov (8)  r1.0<1>:ud	r0.0<8;8,1>:ud
+	mov (2)	 r1.0<1>:d		r63.12<2;2,1>:w					
+	mov (1)	 r1.2<1>:d		0x10003:ud		
+
+	send (8) null<1>:d	r1		0x5		0x40A8023:ud
+
+
+
+//Module Name 	: DN_UV_YUY2_Save_Curr_Frame_YUV
+//Author		: Tatiya, Rupesh
+
+
+
+//Module Name 	: DN_UV_Load_Curr_Frame_Y
+//Author		: Tatiya, Rupesh
+//Description	: Saves Y or YUY2 of Current frame.
+
+
+
+
+	mov (8)		acc0.0<1>:ud		r0.0<8;8,1>:ud
+	shl (1)		r62.10<1>:w		r62.10<0;1,0>:w		1:w
+	mov (1)		acc0.0<1>:d			r62.10<0;1,0>:w
+	mov (1)		acc0.1<1>:d			r62.11<0;1,0>:w
+
+	mov (1)		acc0.2<1>:d			0x7001F:ud
+
+	mov (8)     r92.0<1>:ud	acc0.0<8;8,1>:ud
+
+	mov (8)     r101.0<1>:ud	acc0.0<8;8,1>:ud
+	mov (8)     r110.0<1>:ud	acc0.0<8;8,1>:ud
+	mov (8)     r119.0<1>:ud	acc0.0<8;8,1>:ud
+
+	add (1)		r101.1<1>:d 	acc0.1<0;1,0>:d   		8:d
+
+	add (1)		r110.0<1>:d 	acc0.0<0;1,0>:d   		32:d
+
+	add (1)		r119.0<1>:d 	acc0.0<0;1,0>:d   		32:d
+	add (1)		r119.1<1>:d 	acc0.1<0;1,0>:d   		 8:d
+
+	send (8)	null<1>:d	r92		0x5		0x120A8018:ud
+	send (8)	null<1>:d	r101		0x5		0x120A8018:ud
+	send (8)	null<1>:d	r110		0x5		0x120A8018:ud
+	send (8)	null<1>:d	r119		0x5		0x120A8018:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+	//All sub-routines here
+
+
+// Module Name  : Noise_Detection
+// Author		: Tatiya, Rupesh
+// Description	: Performs noise detection on 32 pixels of U (8x4) and 32 pixels of V (8x4).
+
+DN_UV_NOISE_DETECTION_UV:
+
+// Find Field Block Median
+//
+// Purpose   : Find the median value of the nine pixels in the same field
+//             which are centered at current pixel.
+//
+//             Works on 9 pixels centered at the current pixel
+//                NOTE: pixels are within same field.
+//                      v4 - current pixel
+//
+//                  v2 v1 v0
+//                   *  *  *     <--- Different field - not used
+//                  v5 v4 v3
+//                   *  *  *     <--- Different field - not used
+//                  v8 v7 v6
+
+// Algorithm to find median modifies the data.
+// Copy the data needed to calculate median so the original source data stays intact.
+//
+
+//TODO - Change Interleaved implementation to separated one if - ,  does not work on predication. - rT
+
+//Delete Later - rT
+//mov (1) pCUR_UV:uw		52*32:uw
+
+// v0
+mov (16) ubMEDIAN_TEMP(0,0)<1>    	r[a0.0,0]<16;16,1>		
+// v0
+mov (16) ubMEDIAN_TEMP(0,16)<1>   	r[a0.0,32]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(1,0)<1>    	r[a0.0,2]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(1,16)<1>   	r[a0.0,34]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(2,0)<1>    	r[a0.0,4]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(2,16)<1>   	r[a0.0,36]<16;16,1>		
+// v3
+mov (16) ubMEDIAN_TEMP(3,0)<1>    	r[a0.0,64]<16;16,1>  	
+// v3
+mov (16) ubMEDIAN_TEMP(3,16)<1>   	r[a0.0,96]<16;16,1>		
+// v4
+mov (16) ubMEDIAN_TEMP(4,0)<1>		r[a0.0,66]<16;16,1>  	
+// v4
+mov (16) ubMEDIAN_TEMP(4,16)<1>   	r[a0.0,98]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(5,0)<1>		r[a0.0,68]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(5,16)<1>   	r[a0.0,100]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(6,0)<1>    	r[a0.0,128]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(6,16)<1>   	r[a0.0,160]<16;16,1>		
+// v7
+mov (16) ubMEDIAN_TEMP(7,0)<1>		r[a0.0,130]<16;16,1>  	
+// v7
+mov (16) ubMEDIAN_TEMP(7,16)<1>   	r[a0.0,162]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(8,0)<1>		r[a0.0,132]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(8,16)<1>   	r[a0.0,164]<16;16,1>  	
+
+//TODO - Optimize one instruction here.
+add (1)  a0.0:uw		a0.0<0;1,0>:uw 64:uw
+// v0
+mov (16) ubMEDIAN_TEMP(9,0)<1>    	r[a0.0,0]<16;16,1>		
+// v0
+mov (16) ubMEDIAN_TEMP(9,16)<1>   	r[a0.0,32]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(10,0)<1>    	r[a0.0,2]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(10,16)<1>   	r[a0.0,34]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(11,0)<1>    	r[a0.0,4]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(11,16)<1>   	r[a0.0,36]<16;16,1>		
+// v3
+mov (16) ubMEDIAN_TEMP(12,0)<1>    	r[a0.0,64]<16;16,1>  	
+// v3
+mov (16) ubMEDIAN_TEMP(12,16)<1>   	r[a0.0,96]<16;16,1>		
+// v4
+mov (16) ubMEDIAN_TEMP(13,0)<1>		r[a0.0,66]<16;16,1>  	
+// v4
+mov (16) ubMEDIAN_TEMP(13,16)<1>   	r[a0.0,98]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(14,0)<1>		r[a0.0,68]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(14,16)<1>   	r[a0.0,100]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(15,0)<1>    	r[a0.0,128]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(15,16)<1>   	r[a0.0,160]<16;16,1>		
+// v7
+mov (16) ubMEDIAN_TEMP(16,0)<1>		r[a0.0,130]<16;16,1>  	
+// v7
+mov (16) ubMEDIAN_TEMP(16,16)<1>   	r[a0.0,162]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(17,0)<1>		r[a0.0,132]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(17,16)<1>   	r[a0.0,164]<16;16,1>  	
+
+//TODO - Optimize one instruction here.
+add (1)  a0.0:uw		a0.0<0;1,0>:uw 64:uw
+
+// MedianSwap
+//
+//  MedianSwap(inOutLeft, inOutRight)
+//  {
+//      if (inOutLeft > inOutRight)
+//      {
+//          temp = inOutLeft
+//          inOutLeft = inOutRight
+//          inOutRight = temp
+//      }
+//  }
+
+// MedianSwap(v1, v2) - U
+// MedianSwap(v4, v5) - U
+// MedianSwap(v1, v2) - V
+// MedianSwap(v4, v5) - V
+
+cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>  ubMEDIAN_TEMP(2,0)<32;16,2>
+cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  ubMEDIAN_TEMP(5,0)<32;16,2>
+cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(2,1)<32;16,2>
+cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
+
+       	mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(1,0)<32;16,2>	
+       	mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>	
+       	mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(1,1)<32;16,2>	
+		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>	
+
+(f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2>     	ubMEDIAN_TEMP(2,0)<32;16,2>		
+(f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>		ubMEDIAN_TEMP(5,0)<32;16,2>		
+(f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2>  		ubMEDIAN_TEMP(2,1)<32;16,2>		
+(f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(5,1)<32;16,2>		
+
+(f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>		
+(f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2>     ubTEMP1(0,16)<16;16,1>		
+(f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(1,0)<16;16,1>		
+(f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2>     ubTEMP1(1,16)<16;16,1>   	
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(7,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(0,0)<32;16,2>  	ubMEDIAN_TEMP(1,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(7,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(0,1)<32;16,2> 	ubMEDIAN_TEMP(1,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(7,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(0,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(7,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(0,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2>   ubMEDIAN_TEMP(8,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2>	ubMEDIAN_TEMP(1,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2>	ubMEDIAN_TEMP(1,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(1,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(1,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(3,0)<32;16,2>   	ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(6,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(3,1)<32;16,2> 	ubMEDIAN_TEMP(4,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(6,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(3,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(6,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(3,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(6,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(3,0)<2>   ubMEDIAN_TEMP(4,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(6,0)<2>	ubMEDIAN_TEMP(7,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(3,1)<2>  	ubMEDIAN_TEMP(4,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(6,1)<2>	ubMEDIAN_TEMP(7,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(2,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(1,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(1,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2>   ubMEDIAN_TEMP(2,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>	ubMEDIAN_TEMP(5,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2>  	ubMEDIAN_TEMP(2,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>	ubMEDIAN_TEMP(5,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(7,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(0,0)<32;16,2>  	ubMEDIAN_TEMP(3,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(7,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(0,1)<32;16,2> 	ubMEDIAN_TEMP(3,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(7,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(0,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(7,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(0,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2>   ubMEDIAN_TEMP(8,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2>	ubMEDIAN_TEMP(3,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2>	ubMEDIAN_TEMP(3,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(3,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(3,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,0)<32;16,2> 	ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,1)<32;16,2> 	ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0)  mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0)  mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(5,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(5,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(5,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(5,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(5,0)<2>    	ubMEDIAN_TEMP(8,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>		ubMEDIAN_TEMP(7,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(5,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(7,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0  (16) null:w         ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0  (16) null:w         ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0)  mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0)  mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(3,0)<32;16,2>   	ubMEDIAN_TEMP(6,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>  	ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(3,1)<32;16,2> 	ubMEDIAN_TEMP(6,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(6,0)<2>     ubMEDIAN_TEMP(3,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubMEDIAN_TEMP(1,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(6,1)<2>     ubMEDIAN_TEMP(3,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubMEDIAN_TEMP(1,1)<32;16,2>      
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(2,0)<32;16,2>   	ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(2,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubMEDIAN_TEMP(5,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubMEDIAN_TEMP(7,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubMEDIAN_TEMP(5,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubMEDIAN_TEMP(7,1)<32;16,2>      
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2>  	ubMEDIAN_TEMP(2,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(4,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(2,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(2,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(0,16)<16;16,1>	
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(6,0)<32;16,2>   ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(6,1)<32;16,2>   ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(6,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(6,1)<32;16,2>		
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2>  	ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(2,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(2,1)<32;16,2>		
+cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>  ubMEDIAN_TEMP(11,0)<32;16,2>
+cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  ubMEDIAN_TEMP(14,0)<32;16,2>
+cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(11,1)<32;16,2>
+cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
+
+       	mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(10,0)<32;16,2>	
+       	mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>	
+       	mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(10,1)<32;16,2>	
+		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>	
+
+(f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2>     	ubMEDIAN_TEMP(11,0)<32;16,2>		
+(f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>		ubMEDIAN_TEMP(14,0)<32;16,2>		
+(f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2>  		ubMEDIAN_TEMP(11,1)<32;16,2>		
+(f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(14,1)<32;16,2>		
+
+(f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>		
+(f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2>     ubTEMP1(0,16)<16;16,1>		
+(f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(1,0)<16;16,1>		
+(f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2>     ubTEMP1(1,16)<16;16,1>   	
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(16,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(9,0)<32;16,2>  	ubMEDIAN_TEMP(10,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(16,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(9,1)<32;16,2> 	ubMEDIAN_TEMP(10,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(16,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(9,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(16,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(9,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2>   ubMEDIAN_TEMP(17,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2>	ubMEDIAN_TEMP(10,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2>	ubMEDIAN_TEMP(10,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(10,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(10,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(12,0)<32;16,2>   	ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(15,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(12,1)<32;16,2> 	ubMEDIAN_TEMP(13,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(15,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(12,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(15,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(12,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(15,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(12,0)<2>   ubMEDIAN_TEMP(13,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(15,0)<2>	ubMEDIAN_TEMP(16,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(12,1)<2>  	ubMEDIAN_TEMP(13,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(15,1)<2>	ubMEDIAN_TEMP(16,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(11,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(10,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(10,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2>   ubMEDIAN_TEMP(11,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>	ubMEDIAN_TEMP(14,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2>  	ubMEDIAN_TEMP(11,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>	ubMEDIAN_TEMP(14,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(16,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(9,0)<32;16,2>  	ubMEDIAN_TEMP(12,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(16,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(9,1)<32;16,2> 	ubMEDIAN_TEMP(12,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(16,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(9,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(16,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(9,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2>   ubMEDIAN_TEMP(17,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2>	ubMEDIAN_TEMP(12,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2>	ubMEDIAN_TEMP(12,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(12,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(12,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,0)<32;16,2> 	ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,1)<32;16,2> 	ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0)  mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0)  mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(14,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(14,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(14,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(14,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(14,0)<2>    	ubMEDIAN_TEMP(17,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>		ubMEDIAN_TEMP(16,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(14,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(16,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0  (16) null:w         ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0  (16) null:w         ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0)  mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0)  mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(12,0)<32;16,2>   	ubMEDIAN_TEMP(15,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>  	ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(12,1)<32;16,2> 	ubMEDIAN_TEMP(15,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(15,0)<2>     ubMEDIAN_TEMP(12,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubMEDIAN_TEMP(10,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(15,1)<2>     ubMEDIAN_TEMP(12,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubMEDIAN_TEMP(10,1)<32;16,2>      
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(11,0)<32;16,2>   	ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(11,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubMEDIAN_TEMP(14,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubMEDIAN_TEMP(16,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubMEDIAN_TEMP(14,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubMEDIAN_TEMP(16,1)<32;16,2>      
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2>  	ubMEDIAN_TEMP(11,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(13,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(11,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(11,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(0,16)<16;16,1>	
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(15,0)<32;16,2>   ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(15,1)<32;16,2>   ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(15,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(15,1)<32;16,2>		
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2>  	ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(11,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(11,1)<32;16,2>		
+
+// Sobel Value calculation for the current pixel v4
+//          v2 v1 v0
+//           *  *  *     <--- Different field - not used
+//          v5 v4 v3
+//           *  *  *     <--- Different field - not used
+//          v8 v7 v6
+//
+//    Gx = -v0 - 2*v3 - v6 + v2 + 2*v5 + v8
+//    Gy =  v0 + 2*v1 + v2 - v6 - 2*v7 - v8
+//
+//  Sobel = (|Gx| + |Gy|) >> 3
+
+//TODO - Change Later - rT
+add (1) a0.0:uw  a0.0<0;1,0>:uw -128:uw
+
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,64]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,132]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,0]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,128]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,4]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(0)<1> 	r[a0.0,68]<16;16,1>:ub   		2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,96]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,164]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,32]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,160]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,36]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(1)<1> 	r[a0.0,100]<16;16,1>:ub   		2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,128]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,196]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,64]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,192]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,68]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(2)<1> 	r[a0.0,132]<16;16,1>:ub   		2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,160]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,228]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,96]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,224]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,100]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(3)<1> 	r[a0.0,164]<16;16,1>:ub   		2:w
+
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,2]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,0]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,132]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,4]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,128]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,130]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(0)<16;16,1>
+
+shr (16) uwSOBEL(0)<1>	acc0.0<16;16,1>:uw   3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,34]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,32]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,164]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,36]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,160]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,162]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(1)<16;16,1>
+
+shr (16) uwSOBEL(1)<1>	acc0.0<16;16,1>:uw   3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,66]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,64]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,196]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,68]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,192]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,194]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(2)<16;16,1>
+
+shr (16) uwSOBEL(2)<1>	acc0.0<16;16,1>:uw   3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,98]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,96]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,228]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,100]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,224]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,226]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(3)<16;16,1>
+
+shr (16) uwSOBEL(3)<1>	acc0.0<16;16,1>:uw   3:uw
+
+//Mov Median in CURBE_TEMP to free up temp space.
+mov (16)	ubMEDIAN(0,0)<1>  	ubMEDIAN_TEMP(4,0)<16;16,1>		
+mov (16)	ubMEDIAN(0,16)<1> ubMEDIAN_TEMP(4,16)<16;16,1>		
+mov (16)	ubMEDIAN(0,32)<1>  	ubMEDIAN_TEMP(13,0)<16;16,1>		
+mov (16)	ubMEDIAN(0,48)<1> ubMEDIAN_TEMP(13,16)<16;16,1>		
+
+// Find:
+//      absDiff = abs(ubCurY - ubMedian)
+// Find the difference between pixel and median value.
+
+//Median is interleaved. So difference is also interleaved.
+
+//------------------------------------------------------------------------------------------
+//Process 16 U and 16 V pixels here and rest later.
+// first row - v0,v1,v2
+add (16) wDIFF(0)<1>   r[a0.0,0]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(1)<1>   r[a0.0,2]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(2)<1>   r[a0.0,4]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(3)<1>   r[a0.0,64]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(4)<1>   r[a0.0,66]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(5)<1>   r[a0.0,68]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(6)<1>   r[a0.0,128]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(7)<1>   r[a0.0,130]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(8)<1>   r[a0.0,132]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+// first row - v0,v1,v2
+add (16) wDIFF(9)<1>   r[a0.0,32]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(10)<1>   r[a0.0,34]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(11)<1>   r[a0.0,36]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(12)<1>   r[a0.0,96]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(13)<1>   r[a0.0,98]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(14)<1>   r[a0.0,100]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(15)<1>   r[a0.0,160]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(16)<1>   r[a0.0,162]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(17)<1>   r[a0.0,164]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+
+//TODO - Change Later - rT
+add (1) a0.0:uw  a0.0<0;1,0>:uw 64:uw
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//First 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(0)<16;16,1>  (abs)wDIFF(1)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(2)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(3)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(4)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(5)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(6)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(7)<16;16,1>
+	add        (16) uwSOAD(0)<1>  	 acc0.0<16;16,1>:uw 		(abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//------------
+	//DIFF(0-7) is not needed here. Populate it.
+	// first row - v0,v1,v2
+	add (16) wDIFF(0)<1>   r[a0.0,0]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(1)<1>   r[a0.0,2]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(2)<1>   r[a0.0,4]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+
+	// second row - v3,v4,v5
+	add (16) wDIFF(3)<1>   r[a0.0,64]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(4)<1>   r[a0.0,66]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(5)<1>   r[a0.0,68]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+
+	// third row - v6,v7
+	add (16) wDIFF(6)<1>   r[a0.0,128]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(7)<1>   r[a0.0,130]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(0)<1> uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+
+//------------
+	//Load v8 - DIFF(8)
+	add (16) wDIFF(8)<1>   			r[a0.0,132]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+//------------
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(9)<16;16,1>  (abs)wDIFF(10)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(11)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(12)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(13)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(14)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(15)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(16)<16;16,1>
+	add        (16) uwSOAD(1)<1>  	 acc0.0<16;16,1>:uw 		(abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//------------
+	//DIFF(0-7) is not needed here. Populate it.
+	// first row - v0,v1,v2
+	add (16) wDIFF(9)<1>   r[a0.0,32]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(10)<1>   r[a0.0,34]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(11)<1>   r[a0.0,36]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+
+	// second row - v3,v4,v5
+	add (16) wDIFF(12)<1>   r[a0.0,96]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(13)<1>   r[a0.0,98]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(14)<1>   r[a0.0,100]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+
+	// third row - v6,v7
+	add (16) wDIFF(15)<1>   r[a0.0,160]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(16)<1>   r[a0.0,162]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(1)<1> uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+
+//------------
+	//Load v8 - DIFF(8)
+	add (16) wDIFF(17)<1>   			r[a0.0,164]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max-block_min) < m_LocalDiffThreshold))
+//						if (sigma_mb_min > sigma)
+//							sigma_mb_min = sigma;
+
+//NOTE: block_min is always zero as median is one of the value in 3x3 block. So no need o calculate it.
+//		So just do -
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max) < m_LocalDiffThreshold) && ( sigma < sigma_mb_min))
+//							sigma_mb_min = sigma;
+
+//We are processing 32 bytes of U and 32 bytes of V - each of size 8x4.
+//Compare first 8 bytes with max possible (255).
+//Start above condition from second 8 bytes.
+
+//TODO - Change Later - rT
+//	mov (1)	pCUR_MIN_SOAD_8x4:uw	1752:uw		//r54.24:ub
+
+//First row of 8x4
+        cmp.l.f0.0 	(16) null:uw     		uwSOBEL(0)<16;16,1>         r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(0)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(0)<16;16,1>			255:uw
+(f0.0)  sel 		(16) uwSOBEL(0)<1>   uwSOAD(0)<16;16,1>			255:uw
+
+//Second row of 8x4
+		cmp.l.f0.0 	(16) null:uw     		uwSOBEL(1)<16;16,1>         r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(1)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(1)<16;16,1>			uwSOBEL(0)<16;16,1>
+(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(1)<16;16,1>
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//Second 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(0)<16;16,1>  (abs)wDIFF(1)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(2)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(3)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(4)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(5)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(6)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(7)<16;16,1>
+	add        (16) uwSOAD(0)<1> 	 acc0.0<16;16,1>:uw 		(abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(9)<16;16,1>  (abs)wDIFF(10)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(11)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(12)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(13)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(14)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(15)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(16)<16;16,1>
+	add        (16) uwSOAD(1)<1> 	 acc0.0<16;16,1>:uw 		(abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(1)<1> 	uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+
+//Third row of 8x4
+        cmp.l.f0.0 	(16) null:uw     		uwSOBEL(2)<16;16,1>     	r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(0)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(0)<16;16,1>			uwSOBEL(0)<16;16,1>
+(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(0)<16;16,1>
+
+//Fourth row of 8x4
+		cmp.l.f0.0 	(16) null:uw     		uwSOBEL(3)<16;16,1>     	r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(1)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(1)<16;16,1>			uwSOBEL(0)<16;16,1>
+(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(1)<16;16,1>
+
+		cmp.l.f0.0 	(8) null:uw     		uwSOBEL(0,0)<8;8,1>  	uwSOBEL(0,8)<8;8,1>
+(f0.0)  sel 		(8) uwSOBEL(0)<1>   	uwSOBEL(0,0)<8;8,1>  	uwSOBEL(0,8)<8;8,1>
+
+		cmp.l.f0.0 	(4) null:uw     		uwSOBEL(0,0)<4;4,1>  	uwSOBEL(0,4)<4;4,1>
+(f0.0)  sel 		(4) uwSOBEL(0)<1>   	uwSOBEL(0,0)<4;4,1>  	uwSOBEL(0,4)<4;4,1>
+
+		cmp.l.f0.0 	(2) null:uw     					uwSOBEL(0,0)<2;2,1>  uwSOBEL(0,2)<2;2,1>
+(f0.0)  sel 		(2) r[a0.1,0]<1>:uw   	uwSOBEL(0,0)<2;2,1>  uwSOBEL(0,2)<2;2,1>
+
+
+
+
+
+
+// End of common.inc
+
+mov (1) ip:ud r7.7<0;1,0>:d
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DN_422CP.g4a b/src/shaders/post_processing/gen7/PA_DN_422CP.g4a
new file mode 100644
index 0000000..37f0ff0
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DN_422CP.g4a
@@ -0,0 +1,491 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//  114    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PA_DN_422CP
+.code
+
+
+
+// FileName:	DN_PA_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block) for Packed format
+
+
+
+// FileName:	DN.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x49E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(4,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+
+	mov (2)    mudMSGHDR_HIST(1)<1>    	udDNDI_RESP(4,0)<2;2,1>    	// Move denoise history to MRF (4x2)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x10003:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x50003:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE |   X  |   X   |  X  |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	//|            X             |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	mov (1)		mubMSGHDR_ENC_STATS(1,0)<1>		ubDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr }				// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,3)<1>		uwDNDI_RESP(4,11)<0;1,0>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,4)<1>		uwDNDI_RESP(4,12)<2;2,1>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,9)<1>		uwDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,10)<1>	uwDNDI_RESP(4,9)<2;2,1>    		{ NoDDChk }				// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Save_PA.asm
+// Author:		Vivek Kumar
+// Description:	Save one 16x8 blocks of DN output in Packed format
+
+
+add (4)		a0.4<1>:uw   r2.28<4;4,1>:ub   1024:w    // Initial Y,U,V offset in YUV422 block; it starts at m14
+
+mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            					// message header
+shl (1)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<0;1,0>:w     1:w  		{ NoDDClr }     // X origin * 2 (422 output)
+mov (1)     mdMSGHDR_DN_OUT(0,1)<1>		r7.1<0;1,0>:w          		{ NoDDClr, NoDDChk }  // Y origin
+mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x7001F:ud	{ NoDDChk }     // block width and height (32x8)
+
+	mov (16)    r[a0.4,0]<2>:ub   ubDNDI_RESP(0,0)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,32]<2>:ub   ubDNDI_RESP(0,16)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,64]<2>:ub   ubDNDI_RESP(0,32)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,96]<2>:ub   ubDNDI_RESP(0,48)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,128]<2>:ub   ubDNDI_RESP(0,64)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,160]<2>:ub   ubDNDI_RESP(0,80)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,192]<2>:ub   ubDNDI_RESP(0,96)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,224]<2>:ub   ubDNDI_RESP(0,112)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (8)     r[a0.5,0]<4>:ub   ubDNDI_RESP(5,1)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,0]<4>:ub   ubDNDI_RESP(5,0)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,32]<4>:ub   ubDNDI_RESP(5,17)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,32]<4>:ub   ubDNDI_RESP(5,16)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,64]<4>:ub   ubDNDI_RESP(5,33)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,64]<4>:ub   ubDNDI_RESP(5,32)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,96]<4>:ub   ubDNDI_RESP(5,49)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,96]<4>:ub   ubDNDI_RESP(5,48)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,128]<4>:ub   ubDNDI_RESP(5,65)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,128]<4>:ub   ubDNDI_RESP(5,64)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,160]<4>:ub   ubDNDI_RESP(5,81)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,160]<4>:ub   ubDNDI_RESP(5,80)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,192]<4>:ub   ubDNDI_RESP(5,97)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,192]<4>:ub   ubDNDI_RESP(5,96)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,224]<4>:ub   ubDNDI_RESP(5,113)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,224]<4>:ub   ubDNDI_RESP(5,112)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+
+//send out data through data port
+send (8)    null<1>:d    r31.0		0x5    0x120A8018:ud
+
+
+
+// FileName:	DN_Save_422CP_16x8.asm
+// Author:		Vivek Kumar
+// Description:	Save one 16x8 blocks of DN output to the color pipe in 4-2-2 format
+
+
+.declare mubMSGHDR_DN_OUT_2   Base=r36.0      ElementSize=1  Type=ub
+
+
+mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            			// message header
+shl (1)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<0;1,0>:w     1:w  		{ NoDDClr }            // X origin * 2 (422 output)
+mov (1)     mdMSGHDR_DN_OUT(0,1)<1>		r7.1<0;1,0>:w          		{ NoDDClr, NoDDChk }   // Y origin
+mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x7000F:ud	{ NoDDClr, NoDDChk }            // block width and height (16x8)
+
+//M0.3	- 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1)		mudMSGHDR_DN_OUT(0,3)<1>		r2.4<0;1,0>:ud 	r7.26<0;1,0>:b		{ NoDDChk }
+
+// First 8 x 8 Block
+	mov (8)		mubMSGHDR_DN_OUT(1)<2>			ubDNDI_RESP(0,0)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(1,16)<2>		ubDNDI_RESP(0,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(2)<2>			ubDNDI_RESP(0,32)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(2,16)<2>		ubDNDI_RESP(0,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(3)<2>			ubDNDI_RESP(0,64)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(3,16)<2>		ubDNDI_RESP(0,80)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(4)<2>			ubDNDI_RESP(0,96)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(4,16)<2>		ubDNDI_RESP(0,112)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(1,1)<4>   	ubDNDI_RESP(5,1)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(1,17)<4>   	ubDNDI_RESP(5,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(1,3)<4>   	ubDNDI_RESP(5,0)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(1,19)<4>   	ubDNDI_RESP(5,16)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(2,1)<4>   	ubDNDI_RESP(5,33)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(2,17)<4>   	ubDNDI_RESP(5,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(2,3)<4>   	ubDNDI_RESP(5,32)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(2,19)<4>   	ubDNDI_RESP(5,48)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(3,1)<4>   	ubDNDI_RESP(5,65)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(3,17)<4>   	ubDNDI_RESP(5,81)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(3,3)<4>   	ubDNDI_RESP(5,64)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(3,19)<4>   	ubDNDI_RESP(5,80)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(4,1)<4>   	ubDNDI_RESP(5,97)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(4,17)<4>   	ubDNDI_RESP(5,113)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(4,3)<4>   	ubDNDI_RESP(5,96)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(4,19)<4>   	ubDNDI_RESP(5,112)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+// Second 8 x 8 Block
+mov	(8)	r36.0<1>:ud		r31.0<8;8,1>:ud
+add	(1)	r36.0<1>:ud		r36.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DN_OUT_2(1)<2>		ubDNDI_RESP(0,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(1,16)<2>	ubDNDI_RESP(0,24)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(2)<2>		ubDNDI_RESP(0,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(2,16)<2>	ubDNDI_RESP(0,56)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(3)<2>		ubDNDI_RESP(0,72)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(3,16)<2>	ubDNDI_RESP(0,88)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(4)<2>		ubDNDI_RESP(0,104)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(4,16)<2>	ubDNDI_RESP(0,120)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(1,1)<4>   	ubDNDI_RESP(5,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(1,17)<4>   	ubDNDI_RESP(5,25)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(1,3)<4>   	ubDNDI_RESP(5,8)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(1,19)<4>   	ubDNDI_RESP(5,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(2,1)<4>   	ubDNDI_RESP(5,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(2,17)<4>   	ubDNDI_RESP(5,57)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(2,3)<4>   	ubDNDI_RESP(5,40)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(2,19)<4>   	ubDNDI_RESP(5,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(3,1)<4>   	ubDNDI_RESP(5,73)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(3,17)<4>   	ubDNDI_RESP(5,89)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(3,3)<4>   	ubDNDI_RESP(5,72)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(3,19)<4>   	ubDNDI_RESP(5,88)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(4,1)<4>   	ubDNDI_RESP(5,105)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(4,17)<4>   	ubDNDI_RESP(5,121)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(4,3)<4>   	ubDNDI_RESP(5,104)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(4,19)<4>   	ubDNDI_RESP(5,120)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+//send out data through data port
+send (8)    null<1>:d    r31.0		0x5    0xA0A801B:ud
+send (8)    null<1>:d    r36.0	0x5    0xA0A801B:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DN_PA.g4a b/src/shaders/post_processing/gen7/PA_DN_PA.g4a
new file mode 100644
index 0000000..5469949
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DN_PA.g4a
@@ -0,0 +1,403 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   57    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PA_DN_PA
+.code
+
+
+
+// FileName:	DN_PA_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block) for Packed format
+
+
+
+// FileName:	DN.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x49E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(4,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+
+	mov (2)    mudMSGHDR_HIST(1)<1>    	udDNDI_RESP(4,0)<2;2,1>    	// Move denoise history to MRF (4x2)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x10003:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x50003:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE |   X  |   X   |  X  |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	//|            X             |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	mov (1)		mubMSGHDR_ENC_STATS(1,0)<1>		ubDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr }				// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,3)<1>		uwDNDI_RESP(4,11)<0;1,0>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,4)<1>		uwDNDI_RESP(4,12)<2;2,1>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,9)<1>		uwDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,10)<1>	uwDNDI_RESP(4,9)<2;2,1>    		{ NoDDChk }				// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Save_PA.asm
+// Author:		Vivek Kumar
+// Description:	Save one 16x8 blocks of DN output in Packed format
+
+
+add (4)		a0.4<1>:uw   r2.28<4;4,1>:ub   1024:w    // Initial Y,U,V offset in YUV422 block; it starts at m14
+
+mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            					// message header
+shl (1)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<0;1,0>:w     1:w  		{ NoDDClr }     // X origin * 2 (422 output)
+mov (1)     mdMSGHDR_DN_OUT(0,1)<1>		r7.1<0;1,0>:w          		{ NoDDClr, NoDDChk }  // Y origin
+mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x7001F:ud	{ NoDDChk }     // block width and height (32x8)
+
+	mov (16)    r[a0.4,0]<2>:ub   ubDNDI_RESP(0,0)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,32]<2>:ub   ubDNDI_RESP(0,16)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,64]<2>:ub   ubDNDI_RESP(0,32)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,96]<2>:ub   ubDNDI_RESP(0,48)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,128]<2>:ub   ubDNDI_RESP(0,64)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,160]<2>:ub   ubDNDI_RESP(0,80)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,192]<2>:ub   ubDNDI_RESP(0,96)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (16)    r[a0.4,224]<2>:ub   ubDNDI_RESP(0,112)<16;16,1>    	{ NoDDClr }   	// copy line of Y directly to memory as optimization
+	mov (8)     r[a0.5,0]<4>:ub   ubDNDI_RESP(5,1)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,0]<4>:ub   ubDNDI_RESP(5,0)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,32]<4>:ub   ubDNDI_RESP(5,17)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,32]<4>:ub   ubDNDI_RESP(5,16)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,64]<4>:ub   ubDNDI_RESP(5,33)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,64]<4>:ub   ubDNDI_RESP(5,32)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,96]<4>:ub   ubDNDI_RESP(5,49)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,96]<4>:ub   ubDNDI_RESP(5,48)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,128]<4>:ub   ubDNDI_RESP(5,65)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,128]<4>:ub   ubDNDI_RESP(5,64)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,160]<4>:ub   ubDNDI_RESP(5,81)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,160]<4>:ub   ubDNDI_RESP(5,80)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,192]<4>:ub   ubDNDI_RESP(5,97)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,192]<4>:ub   ubDNDI_RESP(5,96)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+	mov (8)     r[a0.5,224]<4>:ub   ubDNDI_RESP(5,113)<16;8,2>  { NoDDClr, NoDDChk }  // copy line of U directly to memory as optimization
+	mov (8)     r[a0.6,224]<4>:ub   ubDNDI_RESP(5,112)<16;8,2>    { NoDDChk }  	// copy line of V directly to memory as optimization
+
+//send out data through data port
+send (8)    null<1>:d    r31.0		0x5    0x120A8018:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL2_AVS_Buf_0.g4a b/src/shaders/post_processing/gen7/PL2_AVS_Buf_0.g4a
new file mode 100644
index 0000000..b92a5d0
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL2_AVS_Buf_0.g4a
@@ -0,0 +1,542 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   44    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PL2_AVS_Buf_0.asm
+// Author:			Tatiya, Rupesh
+// Description:		Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName     :   PL2_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+        //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+        //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+        mov (1)     r22.4<1>:ud       0x400040:ud
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_0_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x0000D000:ud                                        // Enable Red channel
+
+
+        mov (1)   r25.7<1>:ud    r7.7:ud           { NoDDClr }
+        mov (1)   r25.1<1>:ud    r7.12:uw       { NoDDChk }
+
+
+    // set the vertical block number
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud                                     // Copy msg payload mirrors to MRFs
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw                           // save f0.0
+    mov  (1)    r14.5:f             r17.2<0;1,0>:f           // save pixel 0 U for chroma
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_PL2_AVS_WA_DONE_L0_0_
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f1.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+GEN7_PL2_AVS_WA_DONE_L0_0_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+    send (1)    uwBUFFER_0(0)<1>   r16    0x2    a0.0:ud
+    // Returns Y data in 4 GRFs in scrambled order
+
+    mov  (1)    r17.2:f       r14.5:f                    // restore pixel 0 U for chroma, No AVS WA for chroma
+
+    add (1)     a0.0:ud                r23.5<0;1,0>:ud      0x48EB801:ud            // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000A000:ud                                               // Enable Red+Blue channel
+
+    send (1)    uwBUFFER_0(4)<1>   r16    0x2    a0.0:ud
+    // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+    nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL2_AVS_Buf_1.g4a b/src/shaders/post_processing/gen7/PL2_AVS_Buf_1.g4a
new file mode 100644
index 0000000..9609f60
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL2_AVS_Buf_1.g4a
@@ -0,0 +1,535 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   42    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PL2_AVS_Buf_1.asm
+// Author:			Tatiya, Rupesh
+// Description:		Loads 8x8 AVS/IEF PL2 data into Buffer 1
+
+
+
+// FileName     :   PL2_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_1_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x0000D000:ud                                        // Enable Red channel
+
+
+    // set the vertical block number
+
+       add (1)   r25.1<1>:ud    r7.12:uw  1:ud
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud                                     // Copy msg payload mirrors to MRFs
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw                           // save f0.0
+    mov  (1)    r14.5:f             r17.2<0;1,0>:f           // save pixel 0 U for chroma
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_PL2_AVS_WA_DONE_L0_1_
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f1.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+GEN7_PL2_AVS_WA_DONE_L0_1_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+    send (1)    uwBUFFER_1(0)<1>   r16    0x2    a0.0:ud
+    // Returns Y data in 4 GRFs in scrambled order
+
+    mov  (1)    r17.2:f       r14.5:f                    // restore pixel 0 U for chroma, No AVS WA for chroma
+
+    add (1)     a0.0:ud                r23.5<0;1,0>:ud      0x48EB801:ud            // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000A000:ud                                               // Enable Red+Blue channel
+
+    send (1)    uwBUFFER_1(4)<1>   r16    0x2    a0.0:ud
+    // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_1_:
+    nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL2_AVS_Buf_2.g4a b/src/shaders/post_processing/gen7/PL2_AVS_Buf_2.g4a
new file mode 100644
index 0000000..d07d1f2
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL2_AVS_Buf_2.g4a
@@ -0,0 +1,536 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   42    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PL2_AVS_Buf_2.asm
+// Author:			Tatiya, Rupesh
+// Description:		Loads 8x8 AVS/IEF PL2 data into Buffer 2
+
+
+
+// FileName     :   PL2_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_2_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x0000D000:ud                                        // Enable Red channel
+
+
+    // set the vertical block number
+
+
+      add (1)   r25.1<1>:ud    r7.12:uw  2:ud
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud                                     // Copy msg payload mirrors to MRFs
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw                           // save f0.0
+    mov  (1)    r14.5:f             r17.2<0;1,0>:f           // save pixel 0 U for chroma
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_PL2_AVS_WA_DONE_L0_2_
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f1.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+GEN7_PL2_AVS_WA_DONE_L0_2_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+    send (1)    uwBUFFER_2(0)<1>   r16    0x2    a0.0:ud
+    // Returns Y data in 4 GRFs in scrambled order
+
+    mov  (1)    r17.2:f       r14.5:f                    // restore pixel 0 U for chroma, No AVS WA for chroma
+
+    add (1)     a0.0:ud                r23.5<0;1,0>:ud      0x48EB801:ud            // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000A000:ud                                               // Enable Red+Blue channel
+
+    send (1)    uwBUFFER_2(4)<1>   r16    0x2    a0.0:ud
+    // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_2_:
+    nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL2_AVS_Buf_3.g4a b/src/shaders/post_processing/gen7/PL2_AVS_Buf_3.g4a
new file mode 100644
index 0000000..3500df1
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL2_AVS_Buf_3.g4a
@@ -0,0 +1,536 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   42    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PL2_AVS_Buf_3.asm
+// Author:			Tatiya, Rupesh
+// Description:		Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName     :   PL2_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_3_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x0000D000:ud                                        // Enable Red channel
+
+
+    // set the vertical block number
+
+
+      add (1)   r25.1<1>:ud    r7.12:uw  3:ud
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud                                     // Copy msg payload mirrors to MRFs
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw                           // save f0.0
+    mov  (1)    r14.5:f             r17.2<0;1,0>:f           // save pixel 0 U for chroma
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_PL2_AVS_WA_DONE_L0_3_
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f1.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+GEN7_PL2_AVS_WA_DONE_L0_3_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+    send (1)    uwBUFFER_3(0)<1>   r16    0x2    a0.0:ud
+    // Returns Y data in 4 GRFs in scrambled order
+
+    mov  (1)    r17.2:f       r14.5:f                    // restore pixel 0 U for chroma, No AVS WA for chroma
+
+    add (1)     a0.0:ud                r23.5<0;1,0>:ud      0x48EB801:ud            // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000A000:ud                                               // Enable Red+Blue channel
+
+    send (1)    uwBUFFER_3(4)<1>   r16    0x2    a0.0:ud
+    // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_3_:
+    nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL3_AVS_Buf_0.g4a b/src/shaders/post_processing/gen7/PL3_AVS_Buf_0.g4a
new file mode 100644
index 0000000..8d487aa
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_AVS_Buf_0.g4a
@@ -0,0 +1,549 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   47    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PL3_AVS_Buf_0.asm
+// Author:			Tatiya, Rupesh
+// Description:		Loads 8x8 AVS/IEF PL3 data into Buffer 0
+
+
+
+// FileName     :   PL3_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF PL3 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+        //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+        //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+        mov (1)     r22.4<1>:ud       0x400040:ud
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_0_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x0000D000:ud            // Enable Red channel
+
+
+        mov (1)   r25.7<1>:ud    r7.7:ud           { NoDDClr }
+        mov (1)   r25.1<1>:ud    r7.12:uw       { NoDDChk }
+
+
+    // set the vertical block number
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud         // Copy msg payload mirrors to MRFs
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw                           // save f0.0
+    mov  (1)    r14.5:f             r17.2<0;1,0>:f           // save pixel 0 U for chroma
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_PL3_AVS_WA_DONE_L0_0_
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f1.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+GEN7_PL3_AVS_WA_DONE_L0_0_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+
+    send (1)    uwBUFFER_0(0)<1>   r16    0x2    a0.0:ud
+    // Returns Y data in 4 GRFs in scrambled order
+
+    mov  (1)    r17.2:f       r14.5:f                    // restore pixel 0 U for chroma, No AVS WA for chroma
+
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB801:ud     // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000E000:ud                           // Enable Red channel
+
+    send (1)    uwBUFFER_0(4)<1>   r16    0x2    a0.0:ud
+    // Returns U data in 4 GRFs in scrambled order
+
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EBC02:ud     // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000E000:ud                           // Enable Red channel
+
+    send (1)    uwBUFFER_0(8)<1>   r16    0x2    a0.0:ud
+    // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+        nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL3_AVS_Buf_1.g4a b/src/shaders/post_processing/gen7/PL3_AVS_Buf_1.g4a
new file mode 100644
index 0000000..eb26775
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_AVS_Buf_1.g4a
@@ -0,0 +1,542 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   45    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PL3_AVS_Buf_1.asm
+// Author:			Tatiya, Rupesh
+// Description:		Loads 8x8 AVS/IEF PL3 data into Buffer 1
+
+
+
+// FileName     :   PL3_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF PL3 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_1_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x0000D000:ud            // Enable Red channel
+
+
+    // set the vertical block number
+
+       add (1)   r25.1<1>:ud    r7.12:uw  1:ud
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud         // Copy msg payload mirrors to MRFs
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw                           // save f0.0
+    mov  (1)    r14.5:f             r17.2<0;1,0>:f           // save pixel 0 U for chroma
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_PL3_AVS_WA_DONE_L0_1_
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f1.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+GEN7_PL3_AVS_WA_DONE_L0_1_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+
+    send (1)    uwBUFFER_1(0)<1>   r16    0x2    a0.0:ud
+    // Returns Y data in 4 GRFs in scrambled order
+
+    mov  (1)    r17.2:f       r14.5:f                    // restore pixel 0 U for chroma, No AVS WA for chroma
+
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB801:ud     // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000E000:ud                           // Enable Red channel
+
+    send (1)    uwBUFFER_1(4)<1>   r16    0x2    a0.0:ud
+    // Returns U data in 4 GRFs in scrambled order
+
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EBC02:ud     // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000E000:ud                           // Enable Red channel
+
+    send (1)    uwBUFFER_1(8)<1>   r16    0x2    a0.0:ud
+    // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_1_:
+        nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL3_AVS_Buf_2.g4a b/src/shaders/post_processing/gen7/PL3_AVS_Buf_2.g4a
new file mode 100644
index 0000000..aa96383
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_AVS_Buf_2.g4a
@@ -0,0 +1,543 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   45    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PL3_AVS_Buf_2.asm
+// Author:			Tatiya, Rupesh
+// Description:		Loads 8x8 AVS/IEF PL3 data into Buffer 2
+
+
+
+// FileName     :   PL3_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF PL3 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_2_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x0000D000:ud            // Enable Red channel
+
+
+    // set the vertical block number
+
+
+      add (1)   r25.1<1>:ud    r7.12:uw  2:ud
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud         // Copy msg payload mirrors to MRFs
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw                           // save f0.0
+    mov  (1)    r14.5:f             r17.2<0;1,0>:f           // save pixel 0 U for chroma
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_PL3_AVS_WA_DONE_L0_2_
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f1.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+GEN7_PL3_AVS_WA_DONE_L0_2_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+
+    send (1)    uwBUFFER_2(0)<1>   r16    0x2    a0.0:ud
+    // Returns Y data in 4 GRFs in scrambled order
+
+    mov  (1)    r17.2:f       r14.5:f                    // restore pixel 0 U for chroma, No AVS WA for chroma
+
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB801:ud     // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000E000:ud                           // Enable Red channel
+
+    send (1)    uwBUFFER_2(4)<1>   r16    0x2    a0.0:ud
+    // Returns U data in 4 GRFs in scrambled order
+
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EBC02:ud     // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000E000:ud                           // Enable Red channel
+
+    send (1)    uwBUFFER_2(8)<1>   r16    0x2    a0.0:ud
+    // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_2_:
+        nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL3_AVS_Buf_3.g4a b/src/shaders/post_processing/gen7/PL3_AVS_Buf_3.g4a
new file mode 100644
index 0000000..dfacd42
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_AVS_Buf_3.g4a
@@ -0,0 +1,543 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   45    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		PL3_AVS_Buf_3.asm
+// Author:			Tatiya, Rupesh
+// Description:		Loads 8x8 AVS/IEF PL3 data into Buffer 3
+
+
+
+// FileName     :   PL3_AVS_Buf.asm
+// Author       :   Tatiya, Rupesh
+// Description  :   Loads 8x8 AVS/IEF PL3 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+        // Message Header
+        // m0.7         31:0    Debug
+        // m0.6         31:0    Debug
+        // m0.5         31:0    Ignored
+        // m0.4         31:0    Ignored
+        // m0.3         31:0    Ignored
+        // m0.2         31:16   Ignored
+        //              15      Alpha Write Channel Mask        enable=0, disable=1
+        //              14      Blue Write Channel Mask  (U)
+        //              13      Green Write Channel Mask (Y)
+        //              12      Red Write Channel Mask   (V)
+        //              11:0    Ignored
+        // m0.1                 Ignored
+        // m0.0                 Ignored
+
+
+        // AVS payload
+        // m1.7                 Group ID Number
+        // m1.6                 U 2nd Derivative        ---> NLAS dx
+        // m1.5                 Delta V                 ---> Step Y
+        // m1.4                 Delta U                 ---> Step X
+        // m1.3                 Pixel 0 V Address       ---> ORIY (Y0)
+        // m1.2                 Pixel 0 U Address       ---> ORIX (X0)
+        // m1.1                 Vertical Block Number
+        // m1.0                 Reserved
+
+        // Sampler Message Descriptor
+        // 31:29        Reserved                        000
+        // 28:25        Message length                  0010
+        // 24:20        Response length                 xxxxx   ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+        // 19           Header Present                  1
+        // 18:17        SIMD Mode                       11      ---> SIMD32/64
+        // 16:12        Message Type                    xxxxx   ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+        // 11:8         Sampler Index                   xxxx
+        // 7:0          Binding Table Index             xxxxxxxx
+
+
+        // Msg Header M0.2
+        // 15:15        Alpha Write Channel Mask, 0: written back, 1: not written back
+        // 14:14        Blue  Write Channel Mask
+        // 13:13        Green Write Channel Mask
+        // 12:12        Red   Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+        // 18:17        SIMD Mode                       10      ---> SIMD16
+        // 16:12        Message Type                    xxxxx   ---> 00000 (SIMD16)
+
+
+//r10-17  - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19  - 2 GRFs to store sampler ramp.
+
+    .declare mfSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+    .declare muwSCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+    .declare mudCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+    .declare mubCALING_0X_34X_PAYLOAD	Base=r14.0	ElementSize=1		SrcRegion=<32;32,1>		DstRegion=<1>		Type=ub
+
+
+	.declare fSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=f
+	.declare udSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=4		SrcRegion=<8;8,1>		DstRegion=<1>		Type=ud
+	.declare ub4SCALING_0X_34X_TEMP		Base=r9.0	ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<1>		Type=ub
+	.declare uwSCALING_0X_34X_TEMP		Base=r9.0	ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+	// Sampler ramp is used for Scaling 0X_0.34X
+	.declare	fSAMPLER_RAMP  		Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f		// 1 GRFs, 8 elements
+
+
+	//#define rMSGDSC_UV    		r23.0
+
+
+//End of _SCALING_
+
+
+    //Check if layer is to be skipped
+
+
+        // f0.1 pre-computed in Set_Layer_0
+        (-f0.1)  jmpi  (1)  SKIP_AVS_LOAD_L0_3_
+
+
+    //AVS_PAYLOAD already has all the data loaded at this point
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB400:ud      //msg desc
+
+    mov (1)     r16.2:ud      0x0000D000:ud            // Enable Red channel
+
+
+    // set the vertical block number
+
+
+      add (1)   r25.1<1>:ud    r7.12:uw  3:ud
+
+
+    mov (8)     r17.0:ud      r25.0<8;8,1>:ud         // Copy msg payload mirrors to MRFs
+
+    // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+    //   if (((int)(u_left*width + 5.0/256) > (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    //   }
+    //   else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    //   {
+    //       modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    //   }
+    //   else{
+    //       modified_u_coord = u_coord;
+    //   }
+    //   Where u_left = u – 2*du + 3*ddu for IEF On
+    //   And u_left = u for IEF Off case
+    //   
+
+    // check whether Gen7 AVS WA is enabled,
+    mov  (1)    r14.8:uw            f0.0:uw                           // save f0.0
+    mov  (1)    r14.5:f             r17.2<0;1,0>:f           // save pixel 0 U for chroma
+
+    and.nz.f0.0  (1)     null<1>:uw     r2.3:uw    0x2:uw
+    (-f0.0)jmpi  (1)     GEN7_PL3_AVS_WA_DONE_L0_3_
+
+    // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+    and.nz.f0.0  (8)     null<1>:uw     r2.3<0;1,0>:uw             0x4:uw
+    (f0.0)mov (8)    acc0.0:f          r17.2<0;1,0>:f
+    (f0.0)mac (8)    acc0.0:f          r17.4<0;1,0>:f    -2.0:f
+    (f0.0)mac (8)    acc0.0:f          r17.6<0;1,0>:f     3.0:f
+    (f0.0)mov (1)    r14.2:f        acc0:f                                 // IEF ON,  rTEMP3.2 = u_left
+    (-f0.0)mov (1)   r14.2:f        r17.2<0;1,0>:f                // IEF OFF, rTEMP3.2 = u_left
+
+    and (1)         r14.1:ud     r2.3:uw    0xFFF8:uw
+    asr (1)         r14.1:ud     r14.1:ud    3:d
+    mov (1)         r14.1:f      r14.1:ud
+
+    // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+    mul (1)          r14.0:f        r14.2:f                  r14.1:f    // rTEMP3.0 = u_left*width
+    add (1)          r14.2:f        r14.0:f                  0.01953125:f  // rTEMP3.2 = u_left*width + 5.0/256
+    add (1)          r14.3:f        r14.0:f                  0.99609375:f  // rTEMP3.3 = u_left*width + 255.0/256
+
+    //Check if the values are < 0 and account for (int) cast of negative numbers
+
+    //(int)(u_left*width)
+    cmp.l.f0.0 (1)   null<1>:f          r14.0:f                  0.00000000:f          
+    mov (1)          r14.0:d        r14.0:f
+    (f0.0)add (1)    r14.0:d        r14.0<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 5.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.2:f                  0.00000000:f          
+    mov (1)          r14.2:d        r14.2:f        
+    (f0.0)add (1)    r14.2:d        r14.2<0;1,0>:d           -1:d        
+
+    //(int)(u_left*width + 255.0/256)
+    cmp.l.f0.0 (1)   null<1>:f          r14.3:f                  0.00000000:f          
+    mov (1)          r14.3:d        r14.3:f
+    (f0.0)add (1)    r14.3:d        r14.3<0;1,0>:d           -1:d        
+
+    mov  (1)    f0.0:uw                0:uw                   // clear flag
+    //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+    cmp.g.f1.0  (1)     null<1>:d        r14.2:d    r14.0:d
+    // modified_u_coord = u_coord – 5.0/(256*width); //floating point
+    (f1.0) add (1)     r17.2:f       r17.2<0;1,0>:f   -r2.3:f
+    //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width)) 
+    (-f1.0) cmp.e.f0.0  (1) null<1>:d    r14.3:d    r14.0:d
+    // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+    (f0.0) add (1)     r17.2:f       r17.2<0;1,0>:f   r2.2:f
+
+GEN7_PL3_AVS_WA_DONE_L0_3_:
+    mov  (1)    f0.0:uw                r14.8:uw                   // restore f0.0
+
+
+    send (1)    uwBUFFER_3(0)<1>   r16    0x2    a0.0:ud
+    // Returns Y data in 4 GRFs in scrambled order
+
+    mov  (1)    r17.2:f       r14.5:f                    // restore pixel 0 U for chroma, No AVS WA for chroma
+
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EB801:ud     // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000E000:ud                           // Enable Red channel
+
+    send (1)    uwBUFFER_3(4)<1>   r16    0x2    a0.0:ud
+    // Returns U data in 4 GRFs in scrambled order
+
+    add (1)     a0.0:ud     r23.5<0;1,0>:ud      0x44EBC02:ud     // msg desc; 1 is added to change BI to UV
+    mov (1)     r16.2:ud      0x0000E000:ud                           // Enable Red channel
+
+    send (1)    uwBUFFER_3(8)<1>   r16    0x2    a0.0:ud
+    // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_3_:
+        nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL3_DNDI_422CP.g4a b/src/shaders/post_processing/gen7/PL3_DNDI_422CP.g4a
new file mode 100644
index 0000000..16a0fc9
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_DNDI_422CP.g4a
@@ -0,0 +1,562 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//  120    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PL3_DNDI_422CP
+.code
+
+
+
+// FileName:	DNDI_PL_Core.asm
+// Author:		Tatiya, Rupesh
+
+
+
+// FileName:	DNDI_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4BE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+	mov (1)    mudMSGHDR_HIST(1)<1>		udDNDI_RESP(9,0)<0;1,0>		// Move denoise history to MRF (4x1)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x3:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Load_UV_IMC3_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x4 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Load_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x4 block through DATAPORT 
+
+
+	add (2)		r27.0<1>:d			r7.0<2;2,1>:w				r4.4<2;2,1>:w				// Source Y Block origin
+	asr (2)  	r27.0<1>:d     		r27.0<2;2,1>:d       	1:w   						{ NoDDClr }		// U/V block origin should be half of Y's
+	mov (1)		r27.2<1>:ud			0x10007:ud  					 	{ NoDDChk }		// U/V block width and height (8x2)
+    mov (8)     r36<1>:ud    	r27.0<8;8,1>:ud
+    mov (8)     r38<1>:ud     r27.0<8;8,1>:ud
+	send (8)	udDNDI_UV_RESP(0)<1>	r36	0x4	0x2190001:ud
+	send (8)	udDNDI_UV_RESP(1)<1>	r38	0x4	0x2190002:ud
+
+
+
+// FileName:	DN_Save_Y_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save one 16x4 blocks of Y channel of DN output for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1)  null<1>:w               r1.28<0;1,0>:ub     1:w
+
+mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            						// message header   
+mov (2)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<2;2,1>:w  				{ NoDDClr }        	// X origin * 2 (422 output)
+mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x3000F:ud		{ NoDDChk }        	// block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+
+	mov (4)     mudMSGHDR_DN_OUT(1,0)<1>    udDNDI_RESP(10,0)<4;4,1> 	{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(1,4)<1>   	udDNDI_RESP(4,4)<4;4,1> 		{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+	mov (4)     mudMSGHDR_DN_OUT(2,0)<1>    udDNDI_RESP(10,4)<4;4,1> 	{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(2,4)<1>   udDNDI_RESP(5,4)<4;4,1> 		{ NoDDChk }		// 1st field luma from current frame (line 1,3)    
+
+	jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+	mov (4)     mudMSGHDR_DN_OUT(1,0)<1>    udDNDI_RESP(4,0)<4;4,1> 		{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(1,4)<1>   	udDNDI_RESP(10,0)<4;4,1> 	{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+	mov (4)     mudMSGHDR_DN_OUT(2,0)<1>    udDNDI_RESP(5,0)<4;4,1> 		{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(2,4)<1>   udDNDI_RESP(10,4)<4;4,1> 	{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8)    null<1>:d    r31.0		0x5    0x60A8018:ud     
+
+
+
+// FileName:    DI_Save_422CP_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in 422 format to Color Pipe (IECP)
+
+
+.declare mubMSGHDR_DI_OUT1_1  Base=r18.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT1_2  Base=r21.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_1  Base=r24.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_2  Base=r27.0      ElementSize=1  Type=ub
+
+
+mov (8) r27.0<1>:ud     r0.0<8;8,1>:ud
+shl (1) r27.0<1>:ud     r7.0<0;1,0>:w            1:w  { NoDDClr }          // H. block origin need to be doubled
+mov (1) r27.1<1>:ud     r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r27.2<1>:ud     0x3000F:ud        { NoDDClr, NoDDChk }       // Block width and height (16x8)
+
+//M0.3  - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1)  r27.3<1>:ud     r2.4<0;1,0>:ud     r7.26<0;1,0>:b     { NoDDChk }
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r27<8;8,1>:ud
+mov (8) r24.0<1>:ud       r27<8;8,1>:ud
+
+
+// Pack 2nd field Y; First 8x4 block
+	mov (8)		mubMSGHDR_DI_OUT1_1(1)<2>			ubDNDI_RESP(0,0)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(1,16)<2>		ubDNDI_RESP(0,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(2)<2>			ubDNDI_RESP(0,32)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(2,16)<2>		ubDNDI_RESP(0,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; First 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,1)<4>   		ubDNDI_RESP(2,1)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,17)<4>   	ubDNDI_RESP(2,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,3)<4>	  	ubDNDI_RESP(2,0)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,19)<4>   	ubDNDI_RESP(2,16)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,1)<4>   		ubDNDI_RESP(2,33)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,17)<4>   	ubDNDI_RESP(2,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,3)<4>	  	ubDNDI_RESP(2,32)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,19)<4>   	ubDNDI_RESP(2,48)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+
+// Pack 2nd field Y; Second 8x4 block
+mov	(8)	r21.0<1>:ud		r18.0<8;8,1>:ud
+add	(1)	r21.0<1>:ud		r21.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DI_OUT1_2(1)<2>			ubDNDI_RESP(0,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(1,16)<2>		ubDNDI_RESP(0,24)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(2)<2>			ubDNDI_RESP(0,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(2,16)<2>		ubDNDI_RESP(0,56)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; Second 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,1)<4>   		ubDNDI_RESP(2,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,17)<4>		ubDNDI_RESP(2,25)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,3)<4>   		ubDNDI_RESP(2,8)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,19)<4>		ubDNDI_RESP(2,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,1)<4>   		ubDNDI_RESP(2,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,17)<4>		ubDNDI_RESP(2,57)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,3)<4>   		ubDNDI_RESP(2,40)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,19)<4>		ubDNDI_RESP(2,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+send (8)    null<1>:d    r18.0   0x5     0x60A801B:ud
+send (8)    null<1>:d    r21.0   0x5     0x60A801B:ud
+
+// Pack 1st field Y; 1st 8x4 block
+	mov (8)		mubMSGHDR_DI_OUT2_1(1)<2>			ubDNDI_RESP(4,0)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(1,16)<2>		ubDNDI_RESP(4,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(2)<2>			ubDNDI_RESP(4,32)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(2,16)<2>		ubDNDI_RESP(4,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 1st field U,V; 1st 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,1)<4>   		ubDNDI_RESP(6,1)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,17)<4>   	ubDNDI_RESP(6,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,3)<4>	  	ubDNDI_RESP(6,0)<8;4,2>		    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,19)<4>   	ubDNDI_RESP(6,16)<8;4,2>	    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,1)<4>   		ubDNDI_RESP(6,33)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,17)<4>   	ubDNDI_RESP(6,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,3)<4>	  	ubDNDI_RESP(6,32)<8;4,2>		    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,19)<4>   	ubDNDI_RESP(6,48)<8;4,2>	    { NoDDChk }    	// copy line of V directly to memory as optimization
+
+// Pack 1st field Y; 2nd 8x4 block
+mov	(8)	r27.0<1>:ud		r24.0<8;8,1>:ud
+add	(1)	r27.0<1>:ud		r27.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DI_OUT2_2(1)<2>			ubDNDI_RESP(4,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(1,16)<2>		ubDNDI_RESP(4,24)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(2)<2>			ubDNDI_RESP(4,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(2,16)<2>		ubDNDI_RESP(4,56)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 1st field U, V; 2nd 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,1)<4>   		ubDNDI_RESP(6,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,17)<4>		ubDNDI_RESP(6,25)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,3)<4>   		ubDNDI_RESP(6,8)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,19)<4>		ubDNDI_RESP(6,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,1)<4>   		ubDNDI_RESP(6,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,17)<4>		ubDNDI_RESP(6,57)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,3)<4>   		ubDNDI_RESP(6,40)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,19)<4>		ubDNDI_RESP(6,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+send (8)    null<1>:d    r24.0     0x5     0x60A801E:ud
+send (8)    null<1>:d    r27.0     0x5     0x60A801E:ud
+
+
+
+// FileName:	DN_Save_UV_IMC3_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x4 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Save_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x4 block through DATAPORT 
+
+
+//Reuse the header from Load component
+
+
+	mov (4)		mudMSGHDR_UCOPY(1)<1>		udDNDI_UV_RESP(0)<4;4,1>
+	mov (4)		mudMSGHDR_VCOPY(1)<1>		udDNDI_UV_RESP(1)<4;4,1>
+    send (4)    null<1>:d    r36	0x5    0x40A8019:ud
+    send (4)    null<1>:d    r38	0x5    0x40A801A:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL3_DNDI_PA.g4a b/src/shaders/post_processing/gen7/PL3_DNDI_PA.g4a
new file mode 100644
index 0000000..65bceeb
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_DNDI_PA.g4a
@@ -0,0 +1,500 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   90    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PL3_DNDI_PA
+.code
+
+
+
+// FileName:	DNDI_PL_Core.asm
+// Author:		Tatiya, Rupesh
+
+
+
+// FileName:	DNDI_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4BE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+	mov (1)    mudMSGHDR_HIST(1)<1>		udDNDI_RESP(9,0)<0;1,0>		// Move denoise history to MRF (4x1)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x3:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Load_UV_IMC3_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x4 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Load_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x4 block through DATAPORT 
+
+
+	add (2)		r27.0<1>:d			r7.0<2;2,1>:w				r4.4<2;2,1>:w				// Source Y Block origin
+	asr (2)  	r27.0<1>:d     		r27.0<2;2,1>:d       	1:w   						{ NoDDClr }		// U/V block origin should be half of Y's
+	mov (1)		r27.2<1>:ud			0x10007:ud  					 	{ NoDDChk }		// U/V block width and height (8x2)
+    mov (8)     r36<1>:ud    	r27.0<8;8,1>:ud
+    mov (8)     r38<1>:ud     r27.0<8;8,1>:ud
+	send (8)	udDNDI_UV_RESP(0)<1>	r36	0x4	0x2190001:ud
+	send (8)	udDNDI_UV_RESP(1)<1>	r38	0x4	0x2190002:ud
+
+
+
+// FileName:	DN_Save_Y_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save one 16x4 blocks of Y channel of DN output for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1)  null<1>:w               r1.28<0;1,0>:ub     1:w
+
+mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            						// message header   
+mov (2)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<2;2,1>:w  				{ NoDDClr }        	// X origin * 2 (422 output)
+mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x3000F:ud		{ NoDDChk }        	// block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+
+	mov (4)     mudMSGHDR_DN_OUT(1,0)<1>    udDNDI_RESP(10,0)<4;4,1> 	{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(1,4)<1>   	udDNDI_RESP(4,4)<4;4,1> 		{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+	mov (4)     mudMSGHDR_DN_OUT(2,0)<1>    udDNDI_RESP(10,4)<4;4,1> 	{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(2,4)<1>   udDNDI_RESP(5,4)<4;4,1> 		{ NoDDChk }		// 1st field luma from current frame (line 1,3)    
+
+	jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+	mov (4)     mudMSGHDR_DN_OUT(1,0)<1>    udDNDI_RESP(4,0)<4;4,1> 		{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(1,4)<1>   	udDNDI_RESP(10,0)<4;4,1> 	{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+	mov (4)     mudMSGHDR_DN_OUT(2,0)<1>    udDNDI_RESP(5,0)<4;4,1> 		{ NoDDClr }		// 2nd field luma from current frame (line 0,2)
+	mov (4)     mudMSGHDR_DN_OUT(2,4)<1>   udDNDI_RESP(10,4)<4;4,1> 	{ NoDDChk }		// 1st field luma from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8)    null<1>:d    r31.0		0x5    0x60A8018:ud     
+
+
+
+// FileName:    DI_Save_PA_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+add (4) a0.4<1>:uw   r2.28<4;4,1>:ub   608:w               // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud     r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d      r7.0<0;1,0>:w            1:w  { NoDDClr }          // H. block origin need to be doubled
+mov (1) r27.1<1>:d      r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r27.2<1>:ud     0x3001F:ud          { NoDDChk }          // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r27<8;8,1>:ud
+mov (8) r23.0<1>:ud       r27<8;8,1>:ud
+
+// Pack 2nd field Y
+    mov (16)    r[a0.4, 0]<2>      ubDNDI_RESP(0,0)               { NoDDClr }
+    mov (16)    r[a0.4, 32]<2>      ubDNDI_RESP(0,16)               { NoDDClr }
+    mov (16)    r[a0.4, 64]<2>      ubDNDI_RESP(0,32)               { NoDDClr }
+    mov (16)    r[a0.4, 96]<2>      ubDNDI_RESP(0,48)               { NoDDClr }
+// Pack 2nd field U
+    mov (8)     r[a0.5, 0]<4>      ubDNDI_RESP(2,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 32]<4>      ubDNDI_RESP(2,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 64]<4>      ubDNDI_RESP(2,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 96]<4>      ubDNDI_RESP(2,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 2nd field V
+    mov (8)     r[a0.6, 0]<4>      ubDNDI_RESP(2,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 32]<4>      ubDNDI_RESP(2,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 64]<4>      ubDNDI_RESP(2,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 96]<4>      ubDNDI_RESP(2,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+// Pack 1st field Y
+    mov (16)    r[a0.4, 160]<2>    ubDNDI_RESP(4,0)               { NoDDClr }
+    mov (16)    r[a0.4, 192]<2>    ubDNDI_RESP(4,16)               { NoDDClr }
+    mov (16)    r[a0.4, 224]<2>    ubDNDI_RESP(4,32)               { NoDDClr }
+    mov (16)    r[a0.4, 256]<2>    ubDNDI_RESP(4,48)               { NoDDClr }
+// Pack 1st field U
+    mov (8)     r[a0.5, 160]<4>    ubDNDI_RESP(6,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 192]<4>    ubDNDI_RESP(6,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 224]<4>    ubDNDI_RESP(6,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 256]<4>    ubDNDI_RESP(6,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 1st field V
+    mov (8)     r[a0.6, 160]<4>    ubDNDI_RESP(6,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 192]<4>    ubDNDI_RESP(6,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 224]<4>    ubDNDI_RESP(6,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 256]<4>    ubDNDI_RESP(6,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+//save the previous frame
+send (8)    null<1>:d    r18.0     0x5     0xA0A801B:ud
+
+//save the current frame
+send (8)    null<1>:d    r23.0     0x5     0xA0A801E:ud
+
+
+
+// FileName:	DN_Save_UV_IMC3_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x4 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Save_16x4.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x4 block through DATAPORT 
+
+
+//Reuse the header from Load component
+
+
+	mov (4)		mudMSGHDR_UCOPY(1)<1>		udDNDI_UV_RESP(0)<4;4,1>
+	mov (4)		mudMSGHDR_VCOPY(1)<1>		udDNDI_UV_RESP(1)<4;4,1>
+    send (4)    null<1>:d    r36	0x5    0x40A8019:ud
+    send (4)    null<1>:d    r38	0x5    0x40A801A:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL3_DNUV_PL3.g4a b/src/shaders/post_processing/gen7/PL3_DNUV_PL3.g4a
new file mode 100644
index 0000000..2e1ad5f
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_DNUV_PL3.g4a
@@ -0,0 +1,2684 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+// 1295    // Total instruction count
+//    1    // Total kernel count
+
+
+.kernel PL3_DNUV_PL3
+.code
+
+
+
+//Module		: DN_UV_Setup
+//Author		: Tatiya, Rupesh
+//Description	: Initial Set-up for DN_UV
+
+
+
+
+// Module name	: ChromaDenoise.inc
+// Author		: Tatiya, Rupesh
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//======================================================
+//Interface for serpent mode Chroma Denoise, added by Le
+//======================================================
+//r1
+
+
+//noise history thresholds (low and high)
+
+
+//temporal difference thresholds (high and low)
+
+
+//noise history thresholds (low and high)
+//#define ubNoiseHistMaxHigh  r1.22
+//#define ubNoiseHistMaxLow  r1.23
+//#define ubNoiseHistDeltaHigh  r1.24
+//#define ubNoiseHistDeltaLow  r1.25
+
+//Gaussian thresholds
+
+
+//temporal difference thresholds (default)
+
+
+//r2
+//history thresholds (default)
+
+
+//denoise factor  (0-63)
+
+
+//====================== Binding table (Explicit To DNUV)=========================================
+//Used by DN_UV kernels
+
+
+	//Pointer to Current Frame UV
+
+
+//r1-r6
+	//CURBE GRFs used as TEMP : Used for max computation and storing max temporarily. : r1-r6
+
+
+	.declare	ubCURBE_TEMP	Base=r1.0	ElementSize=1	Type=ub
+	.declare	uwCURBE_TEMP	Base=r1.0	ElementSize=2	Type=uw
+	.declare	wCURBE_TEMP		Base=r1.0	ElementSize=2	Type=w
+	.declare	fCURBE_TEMP		Base=r1.0	ElementSize=4	Type=f
+	.declare	udCURBE_TEMP		Base=r1.0	ElementSize=4	Type=ud
+	.declare	uwMAX_ABS_DIFF	Base=r5.0	ElementSize=2	Type=uw
+
+	//r1
+
+
+	//r3
+
+
+    //r4
+
+//r7
+	//All of the following has to defined in Same GRF for optimal performance.
+
+
+//r8-24
+    //Previous Frame UV
+
+	.declare	udPREV_UV		Base=r8.0	ElementSize=4	Type=ud
+	.declare	ubPREV_UV		Base=r8.0	ElementSize=1	Type=ub
+
+
+//r25-48
+	//TEMP Space for any Usage.
+
+
+//=========================================================================
+//Definations and declarations for serpent mode Chroma Denoise, added by Le
+//========================================================================= 	  
+
+
+	.declare	udGNE_UV		Base=r24.0	ElementSize=4	Type=ud
+  .declare	fGNE_UV		Base=r24.0	ElementSize=4	Type=f
+  .declare	ubGNE_UV		Base=r24.0	ElementSize=1	Type=ub
+
+  .declare	udMSGHDR_BNE_SERP	Base=r25.0	ElementSize=4	Type=ud
+  .declare	udMSGSRC_BNE_SERP	Base=r26.0	ElementSize=4	Type=ud
+
+
+  .declare	ubDN_UV_Thresholds Base=r26.0	ElementSize=1	Type=ub
+  .declare	ubDN_UV_Thresholds_Temp  Base=r27.0	ElementSize=1	Type=ub
+  .declare	udDN_UV_Thresholds Base=r26.0	ElementSize=4	Type=ud
+  .declare	udDN_UV_Thresholds_Temp Base=r27.0	ElementSize=4	Type=ud
+  .declare	fDN_UV_Thresholds Base=r26.0	ElementSize=4	Type=f
+  .declare	fDN_UV_Thresholds_Temp Base=r27.0	ElementSize=4	Type=f 	
+
+
+//====================================================================================
+
+
+	//TEMP23: To hold V data for PL3 surfaces
+	.declare	udCURR_V_TEMP	Base=r25.0	ElementSize=4	Type=ud
+	.declare	ubCURR_V_TEMP	Base=r25.0	ElementSize=1	Type=ub
+
+	//GRFs to calculate Median: r25-r42
+	.declare	ubMEDIAN_TEMP	Base=r25.0	ElementSize=1	Type=ub
+
+	//18 GRFs to hold difference : r25-r42
+	.declare	wDIFF			Base=r25.0	ElementSize=2	Type=w
+	.declare	uwDIFF			Base=r25.0	ElementSize=2	Type=uw
+
+	//Temporal Diff
+	.declare	wDIFF_TEMPORAL			Base=r25.0	ElementSize=2	Type=w
+	.declare	ubDIFF_TEMPORAL			Base=r25.0	ElementSize=1	Type=ub
+
+	//4 GRFs to hold Sobel Value : r43-46
+	.declare	wSOBEL_X	Base=r43.0	ElementSize=2	Type=w
+	.declare	uwSOBEL		Base=r43.0	ElementSize=2	Type=uw
+
+
+	//2 GRFs to hold SOAD temporarily: r47-48
+	.declare	uwSOAD			Base=r47.0	ElementSize=2	Type=uw
+
+	//Temp GRFs to hold extra YUYV pixels: r43-r48
+	.declare	ubTEMP5			Base=r43.0	ElementSize=1	Type=ub
+
+	//Temp GRFs in Median Calculation: r47-r48
+	.declare	ubTEMP1			Base=r47.0	ElementSize=1	Type=ub
+
+	.declare	uwTEMP0			Base=r48.0	ElementSize=2	Type=uw
+	.declare	ubTEMP0			Base=r48.0	ElementSize=1	Type=ub
+
+	//Temp Space to store Median : r49-50
+
+	.declare	ubMEDIAN	Base=r49.0	ElementSize=1	Type=ub
+
+//r49
+
+
+//r50
+    //Message Source
+
+
+//r51
+	//DN_UV History Surface
+
+	.declare	udHIST_UV		Base=r51.0	ElementSize=4	Type=ud
+	.declare	ubHIST_UV		Base=r51.0	ElementSize=1	Type=ub
+
+//r52 - r91
+	//r52
+	//Current Frame UV
+
+
+	.declare	udCURR_UV		Base=r52.0	ElementSize=4	Type=ud
+	.declare	ubCURR_UV		Base=r52.0	ElementSize=1	Type=ub
+
+	//r54
+	//CURBE COPY
+
+
+	//r55
+
+
+	.declare 	uwSOAD_MIN_8x4		Base=r56.0	ElementSize=2	Type=uw
+
+	//r61
+
+
+	//r62
+
+
+	//History Surface Temp Origin
+
+
+    //r63
+    //Current Frame Y Temp Origin
+
+
+	//BNE Surface Origin
+
+
+    //r70
+
+	.declare	uwDIFF_TEMPORAL_SUM4x4	Base=r70.0	ElementSize=2	Type=uw  //4 GRFs
+
+	//r74-91 : For Saving Dest UV (PL2/PL3)
+
+
+	.declare	ubMSGPAYLOAD_UV0	Base=r75.0	ElementSize=1	Type=ub
+
+
+	.declare	ubMSGPAYLOAD_U		Base=r75.0	ElementSize=1	Type=ub
+
+
+	.declare	ubMSGPAYLOAD_UV1	Base=r84.0	ElementSize=1	Type=ub
+
+
+	.declare	ubMSGPAYLOAD_V		Base=r84.0	ElementSize=1	Type=ub
+
+	//r90
+
+	.declare	uwDIFF_TEMPORAL_SUM4x4_FINAL	Base=r90.0	ElementSize=2	Type=uw  //2 GRFs
+
+//r92-127
+	//Current Frame Y
+
+
+	//r92
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_0		Base=r92	ElementSize=2	Type=uw
+	//r101
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_1		Base=r101	ElementSize=2	Type=uw
+	//r110
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_2		Base=r110	ElementSize=2	Type=uw
+	//r119
+    .declare	uwDIFF_TEMPORAL_SUM4x4_TEMP_3		Base=r119	ElementSize=2	Type=uw
+
+	.declare	udCURR_Y0		Base=r93.0	ElementSize=4	Type=ud
+    .declare	ubCURR_Y0		Base=r93.0	ElementSize=1	Type=ub
+    .declare	udCURR_Y1		Base=r102.0	ElementSize=4	Type=ud
+	.declare	ubCURR_Y1		Base=r102.0	ElementSize=1	Type=ub
+	.declare	udCURR_Y2		Base=r111.0	ElementSize=4	Type=ud
+	.declare	ubCURR_Y2		Base=r111.0	ElementSize=1	Type=ub
+	.declare	udCURR_Y3		Base=r120.0	ElementSize=4	Type=ud
+	.declare	ubCURR_Y3		Base=r120.0	ElementSize=1	Type=ub
+
+	//r92: To hold U data for PL3 surfaces
+	.declare	udCURR_U_TEMP		Base=r92.0	ElementSize=4	Type=ud
+    .declare	ubCURR_U_TEMP		Base=r92.0	ElementSize=1	Type=ub
+
+    //r112: To hold U data for PL3 surfaces
+	.declare	udPREV_U_TEMP		Base=r112.0	ElementSize=4	Type=ud
+	.declare	ubPREV_U_TEMP		Base=r112.0	ElementSize=1	Type=ub
+
+	//r120: To hold U data for PL3 surfaces
+	.declare	udPREV_V_TEMP		Base=r120.0	ElementSize=4	Type=ud
+	.declare	ubPREV_V_TEMP		Base=r120.0	ElementSize=1	Type=ub
+
+
+	// Initialize message source with r0.
+	mov (8)   r50.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r92.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r101.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r110.0<1>:ud		r0.0<8;8,1>:ud
+	mov (8)   r119.0<1>:ud		r0.0<8;8,1>:ud
+
+
+
+//Module Name 	: 	DN_UV_PL3_Load_Curr_Frame_UV
+//Author		:	Tatiya, Rupesh
+//Description	:	Loads Current Frame U/V data for PL3 input.
+
+
+
+//Module name 	:  DN_UV_Load_Curr_Frame_UV
+//Author		:  Tatiya, Rupesh
+//Description	:  Loads Current Frame (UV only).
+//				   We need 4 extra rows (2 per field) and 2 extra pixel (1 each side) for both U and V each.
+//				   The processing size is 16x16 U and V each. So we need : U size - 18x20, V size - 18x20, UV size - 36x20, YUYV size - 72x20.
+
+
+
+
+//18x20 U/V block is partitioned as follows:
+//				<------ 18 ------>
+//				------------------
+//				|   18x8 A1      |
+//				|                |
+//				|----------------|
+//				|    18x8 A2     |
+//				|                |
+//				|----------------|
+//				|    18x4 A2     |
+//				|----------------|
+//
+// Cordinates: (x-1, y-2), (x-1, y+6), (x-1, y+14)
+
+//1. Load U data into starting at CURR_Y0 (r93-r122)
+//2. Load V data into TEMP space (r25-r44)
+
+	//U/V surface origin: (ORIX/2, ORIY/2)
+	add  (2)	r7.4<1>:w		r7.0<2;2,1>:w	 	r4.4<2;2,1>:w	 { AccWrEn }  	// Source Block origin
+	shr  (2)	r7.4<1>:w		acc0.4<2;2,1>:w		1:w							   			//U Data
+	mov  (2)	acc0.0<1>:d							r7.4<2;2,1>:w
+
+	//A1
+	add  (1)   	r50.0<1>:d	acc0.0<0;1,0>:d		-1:d						 
+	add  (1)   	r50.1<1>:d	acc0.1<0;1,0>:d		-2:d						 
+ 	mov  (1)   	r50.2<1>:ud	0x70011:ud				 
+ 	send (8)	udCURR_U_TEMP(0)<1>			r50		0x4	0x2890004:ud
+
+	//A2
+ 	add  (1)   	r50.1<1>:d	acc0.1<0;1,0>:d		6:d
+ 	send (8)	udCURR_U_TEMP(8)<1>			r50		0x4	0x2890004:ud
+
+ 	//A3
+ 	add  (1)   	r50.1<1>:d	acc0.1<0;1,0>:d		14:d						 
+ 	mov  (1)   	r50.2<1>:ud	0x30011:ud				 
+ 	send (8)	udCURR_U_TEMP(16)<1>		r50		0x4	0x2490004:ud
+
+ 	//V Data
+ 	//A1
+ 	add  (1)   	r50.0<1>:d	acc0.0<0;1,0>:d		-1:d						 
+	add  (1)   	r50.1<1>:d	acc0.1<0;1,0>:d		-2:d						 
+ 	mov  (1)   	r50.2<1>:ud	0x70011:ud				 
+ 	send (8)	udCURR_V_TEMP(0)<1>	r50		0x4	0x2890005:ud
+
+ 	//A2
+ 	add  (1)   	r50.1<1>:d	acc0.1<0;1,0>:d		6:d
+ 	send (8)	udCURR_V_TEMP(8)<1>	r50		0x4	0x2890005:ud
+
+ 	//A3
+ 	add  (1)   	r50.1<1>:d	acc0.1<0;1,0>:d		14:d						 
+ 	mov  (1)   	r50.2<1>:ud	0x30011:ud				 
+ 	send (8)	udCURR_V_TEMP(16)<1>	r50		0x4	0x2490005:ud
+
+ 	//History Origin, Current Y origin and BNE surface origin - all are in inline GRF. Use , . -rT.
+
+ 	//Calculate Origin For History Surface: (ORIX/4, ORIY/8)
+	mov  (16)	acc0.0<1>:w						r7.0<0;2,1>:w				{ AccWrEn }
+	shr  (1)	r7.2<1>:w		acc0.2<0;1,0>:w	2:w			
+	shr  (1)	r7.3<1>:w		acc0.3<0;1,0>:w	3:w			
+
+	//Calculate Origin For BNE Surface: (ORIX/8, ORIY/16)
+	shr  (1)	r7.6<1>:w		acc0.6<0;1,0>:w		3:w		
+	shr  (1)	r7.7<1>:w		acc0.7<0;1,0>:w		4:w		
+
+
+
+//Module Name 	: 	DN_UV_PL3_Load_Prev_Frame_UV.asm
+//Author		:	Tatiya, Rupesh
+//Description	:	Loads Pevious Frame UV data for PL3 input.
+
+
+
+//Module Name 	: 	DN_UV_Load_Prev_Frame_UV
+//Author		:	Tatiya, Rupesh
+//Description	:   Loads Prev Frame (UV only). U size - 16x16, V size - 16x16, UV size - 32x16, YUYV size - 64x16.
+
+
+
+
+//1. Load U in bottom half of UV space for prev frame (r17-r24)
+//2. Load V in bottom quarter of Y space for curr frame (r120-r127)
+
+	mov  (2)	r50.0<1>:d		r7.4<2;2,1>:w			{ AccWrEn } 	// Source lock origin
+	mov  (1)	r50.2<1>:ud	0xF000F:ud  						// U/V block width and height (16x16)
+
+	mov  (8)	r49.0<1>:ud	r50<8;8,1>:ud
+
+	send (8)	udPREV_U_TEMP(0)<1>		r50	0x4	0x2890001:ud	//U data
+	send (8)	udPREV_V_TEMP(0)<1>		r49	0x4	0x2890002:ud	//V data
+
+
+	//TODO - See if History loading can be combined with Prev Frame Load. - rT
+
+
+//Module name 	:  DN_UV_Load_Hist_UV
+//Author		:  Tatiya, Rupesh
+//Description	:  Load DN History for UV denoise. 4x4 for each U & V.
+
+
+
+
+	mov  (2)	r50.0<1>:d	r7.2<2;2,1>:w			
+	mov  (1)	r50.2<1>:ud	0x30007:ud  		
+	send (8)	udHIST_UV(0)<1>			r50		0x4	0x2190022:ud
+
+
+
+//File Name		: DN_UV_PL3_Interleave_Curr_Frame_UV.asm
+//Author		: Tatiya, Rupesh
+//Description	: Interleave separately loaded U and V for PL3 format.
+//				  This is needed because Noise Detection and Noise Reduction works on interleaved UV data.
+
+//1. U data: Starting at CURR_Y0 (r93-r122)
+//2. V data: TEMP space (r25-r44)
+
+//In one GRF, we need 10 U (1+8+1) bytes, but there's no SIMD10. So use SIMD16 and discard last 6 bytes.
+
+
+//Move U data
+	mov (16) ubCURR_UV(0,0)<2>	ubCURR_U_TEMP(0,0)<16;16,1>		
+	mov (16) ubCURR_UV(20,0)<2>	ubCURR_U_TEMP(0,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(1,0)<2>	ubCURR_U_TEMP(1,0)<16;16,1>		
+	mov (16) ubCURR_UV(21,0)<2>	ubCURR_U_TEMP(1,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(2,0)<2>	ubCURR_U_TEMP(2,0)<16;16,1>		
+	mov (16) ubCURR_UV(22,0)<2>	ubCURR_U_TEMP(2,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(3,0)<2>	ubCURR_U_TEMP(3,0)<16;16,1>		
+	mov (16) ubCURR_UV(23,0)<2>	ubCURR_U_TEMP(3,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(4,0)<2>	ubCURR_U_TEMP(4,0)<16;16,1>		
+	mov (16) ubCURR_UV(24,0)<2>	ubCURR_U_TEMP(4,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(5,0)<2>	ubCURR_U_TEMP(5,0)<16;16,1>		
+	mov (16) ubCURR_UV(25,0)<2>	ubCURR_U_TEMP(5,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(6,0)<2>	ubCURR_U_TEMP(6,0)<16;16,1>		
+	mov (16) ubCURR_UV(26,0)<2>	ubCURR_U_TEMP(6,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(7,0)<2>	ubCURR_U_TEMP(7,0)<16;16,1>		
+	mov (16) ubCURR_UV(27,0)<2>	ubCURR_U_TEMP(7,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(8,0)<2>	ubCURR_U_TEMP(8,0)<16;16,1>		
+	mov (16) ubCURR_UV(28,0)<2>	ubCURR_U_TEMP(8,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(9,0)<2>	ubCURR_U_TEMP(9,0)<16;16,1>		
+	mov (16) ubCURR_UV(29,0)<2>	ubCURR_U_TEMP(9,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(10,0)<2>	ubCURR_U_TEMP(10,0)<16;16,1>		
+	mov (16) ubCURR_UV(30,0)<2>	ubCURR_U_TEMP(10,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(11,0)<2>	ubCURR_U_TEMP(11,0)<16;16,1>		
+	mov (16) ubCURR_UV(31,0)<2>	ubCURR_U_TEMP(11,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(12,0)<2>	ubCURR_U_TEMP(12,0)<16;16,1>		
+	mov (16) ubCURR_UV(32,0)<2>	ubCURR_U_TEMP(12,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(13,0)<2>	ubCURR_U_TEMP(13,0)<16;16,1>		
+	mov (16) ubCURR_UV(33,0)<2>	ubCURR_U_TEMP(13,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(14,0)<2>	ubCURR_U_TEMP(14,0)<16;16,1>		
+	mov (16) ubCURR_UV(34,0)<2>	ubCURR_U_TEMP(14,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(15,0)<2>	ubCURR_U_TEMP(15,0)<16;16,1>		
+	mov (16) ubCURR_UV(35,0)<2>	ubCURR_U_TEMP(15,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(16,0)<2>	ubCURR_U_TEMP(16,0)<16;16,1>		
+	mov (16) ubCURR_UV(36,0)<2>	ubCURR_U_TEMP(16,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(17,0)<2>	ubCURR_U_TEMP(17,0)<16;16,1>		
+	mov (16) ubCURR_UV(37,0)<2>	ubCURR_U_TEMP(17,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(18,0)<2>	ubCURR_U_TEMP(18,0)<16;16,1>		
+	mov (16) ubCURR_UV(38,0)<2>	ubCURR_U_TEMP(18,8)<16;16,1>		
+
+	mov (16) ubCURR_UV(19,0)<2>	ubCURR_U_TEMP(19,0)<16;16,1>		
+	mov (16) ubCURR_UV(39,0)<2>	ubCURR_U_TEMP(19,8)<16;16,1>		
+
+
+//Move V data
+	mov (16) ubCURR_UV(0,1)<2>	ubCURR_V_TEMP(0,0)<16;16,1>	
+	mov (16) ubCURR_UV(20,1)<2>	ubCURR_V_TEMP(0,8)<16;16,1>	
+	mov (16) ubCURR_UV(1,1)<2>	ubCURR_V_TEMP(1,0)<16;16,1>	
+	mov (16) ubCURR_UV(21,1)<2>	ubCURR_V_TEMP(1,8)<16;16,1>	
+	mov (16) ubCURR_UV(2,1)<2>	ubCURR_V_TEMP(2,0)<16;16,1>	
+	mov (16) ubCURR_UV(22,1)<2>	ubCURR_V_TEMP(2,8)<16;16,1>	
+	mov (16) ubCURR_UV(3,1)<2>	ubCURR_V_TEMP(3,0)<16;16,1>	
+	mov (16) ubCURR_UV(23,1)<2>	ubCURR_V_TEMP(3,8)<16;16,1>	
+	mov (16) ubCURR_UV(4,1)<2>	ubCURR_V_TEMP(4,0)<16;16,1>	
+	mov (16) ubCURR_UV(24,1)<2>	ubCURR_V_TEMP(4,8)<16;16,1>	
+	mov (16) ubCURR_UV(5,1)<2>	ubCURR_V_TEMP(5,0)<16;16,1>	
+	mov (16) ubCURR_UV(25,1)<2>	ubCURR_V_TEMP(5,8)<16;16,1>	
+	mov (16) ubCURR_UV(6,1)<2>	ubCURR_V_TEMP(6,0)<16;16,1>	
+	mov (16) ubCURR_UV(26,1)<2>	ubCURR_V_TEMP(6,8)<16;16,1>	
+	mov (16) ubCURR_UV(7,1)<2>	ubCURR_V_TEMP(7,0)<16;16,1>	
+	mov (16) ubCURR_UV(27,1)<2>	ubCURR_V_TEMP(7,8)<16;16,1>	
+	mov (16) ubCURR_UV(8,1)<2>	ubCURR_V_TEMP(8,0)<16;16,1>	
+	mov (16) ubCURR_UV(28,1)<2>	ubCURR_V_TEMP(8,8)<16;16,1>	
+	mov (16) ubCURR_UV(9,1)<2>	ubCURR_V_TEMP(9,0)<16;16,1>	
+	mov (16) ubCURR_UV(29,1)<2>	ubCURR_V_TEMP(9,8)<16;16,1>	
+	mov (16) ubCURR_UV(10,1)<2>	ubCURR_V_TEMP(10,0)<16;16,1>	
+	mov (16) ubCURR_UV(30,1)<2>	ubCURR_V_TEMP(10,8)<16;16,1>	
+	mov (16) ubCURR_UV(11,1)<2>	ubCURR_V_TEMP(11,0)<16;16,1>	
+	mov (16) ubCURR_UV(31,1)<2>	ubCURR_V_TEMP(11,8)<16;16,1>	
+	mov (16) ubCURR_UV(12,1)<2>	ubCURR_V_TEMP(12,0)<16;16,1>	
+	mov (16) ubCURR_UV(32,1)<2>	ubCURR_V_TEMP(12,8)<16;16,1>	
+	mov (16) ubCURR_UV(13,1)<2>	ubCURR_V_TEMP(13,0)<16;16,1>	
+	mov (16) ubCURR_UV(33,1)<2>	ubCURR_V_TEMP(13,8)<16;16,1>	
+	mov (16) ubCURR_UV(14,1)<2>	ubCURR_V_TEMP(14,0)<16;16,1>	
+	mov (16) ubCURR_UV(34,1)<2>	ubCURR_V_TEMP(14,8)<16;16,1>	
+	mov (16) ubCURR_UV(15,1)<2>	ubCURR_V_TEMP(15,0)<16;16,1>	
+	mov (16) ubCURR_UV(35,1)<2>	ubCURR_V_TEMP(15,8)<16;16,1>	
+	mov (16) ubCURR_UV(16,1)<2>	ubCURR_V_TEMP(16,0)<16;16,1>	
+	mov (16) ubCURR_UV(36,1)<2>	ubCURR_V_TEMP(16,8)<16;16,1>	
+	mov (16) ubCURR_UV(17,1)<2>	ubCURR_V_TEMP(17,0)<16;16,1>	
+	mov (16) ubCURR_UV(37,1)<2>	ubCURR_V_TEMP(17,8)<16;16,1>	
+	mov (16) ubCURR_UV(18,1)<2>	ubCURR_V_TEMP(18,0)<16;16,1>	
+	mov (16) ubCURR_UV(38,1)<2>	ubCURR_V_TEMP(18,8)<16;16,1>	
+	mov (16) ubCURR_UV(19,1)<2>	ubCURR_V_TEMP(19,0)<16;16,1>	
+	mov (16) ubCURR_UV(39,1)<2>	ubCURR_V_TEMP(19,8)<16;16,1>	
+
+
+
+//File Name		: DN_UV_PL3_Interleave_Prev_Frame_UV.asm
+//Author		: Tatiya, Rupesh
+//Description	: Interleave separately loaded U and V for PL3 format.
+//                This is needed because Noise Detection and Noise Reduction works on interleaved UV data.
+
+//1.U Data: bottom half of UV space for prev frame (r17-r24)
+//2.V Data: bottom quarter of Y space for curr frame (r120-r127)
+
+	mov (16)	ubPREV_UV(0,0)<2>		ubPREV_U_TEMP(0,0)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(8,0)<2>		ubPREV_U_TEMP(0,8)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(1,0)<2>		ubPREV_U_TEMP(1,0)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(9,0)<2>		ubPREV_U_TEMP(1,8)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(2,0)<2>		ubPREV_U_TEMP(2,0)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(10,0)<2>		ubPREV_U_TEMP(2,8)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(3,0)<2>		ubPREV_U_TEMP(3,0)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(11,0)<2>		ubPREV_U_TEMP(3,8)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(4,0)<2>		ubPREV_U_TEMP(4,0)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(12,0)<2>		ubPREV_U_TEMP(4,8)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(5,0)<2>		ubPREV_U_TEMP(5,0)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(13,0)<2>		ubPREV_U_TEMP(5,8)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(6,0)<2>		ubPREV_U_TEMP(6,0)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(14,0)<2>		ubPREV_U_TEMP(6,8)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(7,0)<2>		ubPREV_U_TEMP(7,0)<16;8,1>		{ NoDDClr }
+	mov (16)	ubPREV_UV(15,0)<2>		ubPREV_U_TEMP(7,8)<16;8,1>		{ NoDDClr }
+
+	mov (16)	ubPREV_UV(0,1)<2>		ubPREV_V_TEMP(0,0)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(8,1)<2>		ubPREV_V_TEMP(0,8)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(1,1)<2>		ubPREV_V_TEMP(1,0)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(9,1)<2>		ubPREV_V_TEMP(1,8)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(2,1)<2>		ubPREV_V_TEMP(2,0)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(10,1)<2>		ubPREV_V_TEMP(2,8)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(3,1)<2>		ubPREV_V_TEMP(3,0)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(11,1)<2>		ubPREV_V_TEMP(3,8)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(4,1)<2>		ubPREV_V_TEMP(4,0)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(12,1)<2>		ubPREV_V_TEMP(4,8)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(5,1)<2>		ubPREV_V_TEMP(5,0)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(13,1)<2>		ubPREV_V_TEMP(5,8)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(6,1)<2>		ubPREV_V_TEMP(6,0)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(14,1)<2>		ubPREV_V_TEMP(6,8)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(7,1)<2>		ubPREV_V_TEMP(7,0)<16;8,1>		{ NoDDChk }
+	mov (16)	ubPREV_UV(15,1)<2>		ubPREV_V_TEMP(7,8)<16;8,1>		{ NoDDChk }
+
+
+
+//Module Name 	: DN_UV_420_Load_Curr_Frame_Y
+//Author		: Tatiya, Rupesh
+//Description	: Load Curr Frame Y data for 420 Input
+
+
+
+//Module Name 	: DN_UV_Load_Curr_Frame_Y
+//Author		: Tatiya, Rupesh
+//Description	: Loads Y of Current frame.
+
+
+
+
+	//For 16x16 U and 16x16 V for 420, we need to read 32x32 Y.
+
+	mov (8)		acc0.0<1>:ud		r0.0<8;8,1>:ud
+	mov (1)		acc0.2<1>:ud		0xF000F:ud
+	add (2)		acc0.0<1>:ud		r7.0<2;2,1>:w		r4.4<2;2,1>:w
+
+	mov (8)     r92.0<1>:ud	acc0.0<8;8,1>:ud
+
+	mov (8)     r101.0<1>:ud	acc0.0<8;8,1>:ud
+	mov (8)     r110.0<1>:ud	acc0.0<8;8,1>:ud
+	mov (8)     r119.0<1>:ud	acc0.0<8;8,1>:ud
+
+	add (1)		r101.1<1>:d 	acc0.1<0;1,0>:d   		16:d
+
+	add (1)		r110.0<1>:d 	acc0.0<0;1,0>:d   		16:d
+
+	add (2)		r119.0<1>:d 	acc0.0<2;2,1>:d   		16:d
+
+	send (8)	udCURR_Y0(0)<1>		r92		0x4	0x2890003:ud
+	send (8)	udCURR_Y1(0)<1>		r101		0x4	0x2890003:ud
+	send (8)	udCURR_Y2(0)<1>		r110		0x4	0x2890003:ud
+	send (8)	udCURR_Y3(0)<1>		r119		0x4	0x2890003:ud
+
+
+
+//Module Name : DN_UV_Noise_Detection_UV
+//Author	  : Tatiya, Rupesh
+//Description : Performs noise detection on 16x16 U and 16x16 V each.
+
+
+
+//Module Name 	: DN_UV_Move_CURBE_Inline_UV.asm
+//Author		: Tatiya, Rupesh
+
+
+
+
+	//Mov CURBE data to another space - so that it can be used as Temp Space --> r1 - r6
+	mov (4)	r54.28<1>:ub		r2.28<4;4,1>:ub		//Dest. YUY2 offset
+	mov (2) r54.5<1>:ud		r4.0<4;2,2>:ud		//Src YUY2 offset and Origin offset
+	mov (4)	r55.28<1>:ub		r1.0<4;4,1>:ub
+
+	mov (8) r61.20<1>:ub		r1.4<8;8,1>:ub		
+	mov (4) r61.28<1>:ub		r1.12<4;4,1>:ub		
+
+	//Move Inline Data to another space - so that it can be used as Temp Space --> r7
+	mov (4) r62.10<1>:w				r7.0<4;4,1>:w
+	mov (4) r63.10<1>:w		r7.4<4;4,1>:w
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	mov (1) a0.0:uw				1664:uw				
+	mov (1)	a0.1:uw	1816:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				1792:uw			
+	mov (1)	a0.1:uw	1820:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				1920:uw			
+	mov (1)	a0.1:uw	1848:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2048:uw			
+	mov (1)	a0.1:uw	1852:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	mov (1) a0.0:uw				2304:uw			
+	mov (1)	a0.1:uw	1880:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2432:uw			
+	mov (1)	a0.1:uw	1884:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2560:uw			
+	mov (1)	a0.1:uw	1912:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name	: DN_UV_Noise_Detection_Set_Top_Region_N
+//Author		: Tatiya, Rupesh
+//Description	: Sets sub-region region N from Top region.
+
+
+	//TODO - remove one instruction here using arithmatic. -rT
+	mov (1) a0.0:uw				2688:uw			
+	mov (1)	a0.1:uw	1916:uw 	
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+//Module 		: DN_UV_Noise_Reduction_UV
+//Author		: Tatiya, Rupesh
+//Description	: Performs Noise Reduction on 16x16 U and 16x16 V.
+//Tasks			: 1. Update weight history
+//				  2. Find if it block is motion block
+//				  3. Compute Denoised Pixel.
+
+
+
+
+//History is 1+1 byte every 4x4 U and 4x4 V.
+
+	cmp.l.f0.0 (16) null<1>:w		ubHIST_UV(0,0)<16;16,1>		r61.20<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w		ubHIST_UV(0,0)<16;16,1>		r61.22<0;2,1>:ub
+
+	mov (16)	uwCURBE_TEMP(0)<1>	0:w
+	mov (16)	uwCURBE_TEMP(1)<1>	0:w
+
+	//Compute diff betn curr and prev. - First 16 lines
+	// 8 lines here
+    add (16)	wDIFF_TEMPORAL(0)<1>			ubCURR_UV(2,2)<16;16,1>		-ubPREV_UV(0,0)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(1)<1>			ubCURR_UV(3,2)<16;16,1>		-ubPREV_UV(0,16)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(2)<1>			ubCURR_UV(4,2)<16;16,1>		-ubPREV_UV(0,32)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(3)<1>			ubCURR_UV(5,2)<16;16,1>		-ubPREV_UV(0,48)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(4)<1>			ubCURR_UV(6,2)<16;16,1>		-ubPREV_UV(0,64)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(5)<1>			ubCURR_UV(7,2)<16;16,1>		-ubPREV_UV(0,80)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(6)<1>			ubCURR_UV(8,2)<16;16,1>		-ubPREV_UV(0,96)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(7)<1>			ubCURR_UV(9,2)<16;16,1>		-ubPREV_UV(0,112)<16;16,1>		//Diff UV interleaved
+
+	//Update WT HIST
+	(-f0.0) shr 	(16) uwCURBE_TEMP(0)<1>		ubHIST_UV(0,0)<16;16,1>		1:w
+	(f1.0)  add 	(16) uwCURBE_TEMP(2)<1>		ubHIST_UV(0,0)<16;16,1>		r61.24<0;2,1>:ub
+	(f0.0)  mov 	(16) uwCURBE_TEMP(2)<1>		r61.20<0;2,1>:ub
+	(-f0.0.anyv) mov 	(16) uwCURBE_TEMP(2)<1>		ubHIST_UV(0,0)<16;16,1>
+
+	cmp.l.f0.0 (16) null<1>:w		ubHIST_UV(0,16)<16;16,1>	r61.20<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w		ubHIST_UV(0,16)<16;16,1>	r61.22<0;2,1>:ub
+
+	//Compute diff betn curr and prev. - First 16 lines
+	// 8 more lines here
+    add (16)	wDIFF_TEMPORAL(8)<1>			ubCURR_UV(10,2)<16;16,1>		-ubPREV_UV(0,128)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(9)<1>			ubCURR_UV(11,2)<16;16,1>		-ubPREV_UV(0,144)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(10)<1>			ubCURR_UV(12,2)<16;16,1>		-ubPREV_UV(0,160)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(11)<1>			ubCURR_UV(13,2)<16;16,1>		-ubPREV_UV(0,176)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(12)<1>			ubCURR_UV(14,2)<16;16,1>		-ubPREV_UV(0,192)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(13)<1>			ubCURR_UV(15,2)<16;16,1>		-ubPREV_UV(0,208)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(14)<1>			ubCURR_UV(16,2)<16;16,1>		-ubPREV_UV(0,224)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(15)<1>			ubCURR_UV(17,2)<16;16,1>		-ubPREV_UV(0,240)<16;16,1>		//Diff UV interleaved
+
+	(-f0.0) shr 	(16) uwCURBE_TEMP(1)<1>		ubHIST_UV(0,16)<16;16,1>	1:w
+	(f1.0)  add 	(16) uwCURBE_TEMP(3)<1>		ubHIST_UV(0,16)<16;16,1>	r61.24<0;2,1>:ub
+	(f0.0)  mov 	(16) uwCURBE_TEMP(3)<1>		r61.20<0;2,1>:ub
+	(-f0.0.anyv) mov(16) uwCURBE_TEMP(3)<1>		ubHIST_UV(0,16)<16;16,1>
+
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(0)<16;16,1>	(abs)wDIFF_TEMPORAL(1)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(2)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(3)<16;16,1>
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(4)<16;16,1>	(abs)wDIFF_TEMPORAL(5)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(6)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(7)<16;16,1>
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(8)<16;16,1>	(abs)wDIFF_TEMPORAL(9)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(10)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(2)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(11)<16;16,1>
+	//16x16 to 16x4 - First 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(12)<16;16,1>	(abs)wDIFF_TEMPORAL(13)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(14)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(3)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(15)<16;16,1>
+
+//Compute diff betn curr and prev. - Second 16 lines
+//13 lines.
+    add (16)	wDIFF_TEMPORAL(16)<1>		ubCURR_UV(22,2)<16;16,1>		-ubPREV_UV(8,0)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(17)<1>		ubCURR_UV(23,2)<16;16,1>		-ubPREV_UV(8,16)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(18)<1>		ubCURR_UV(24,2)<16;16,1>		-ubPREV_UV(8,32)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(19)<1>		ubCURR_UV(25,2)<16;16,1>		-ubPREV_UV(8,48)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(20)<1>		ubCURR_UV(26,2)<16;16,1>		-ubPREV_UV(8,64)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(21)<1>		ubCURR_UV(27,2)<16;16,1>		-ubPREV_UV(8,80)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(22)<1>		ubCURR_UV(28,2)<16;16,1>		-ubPREV_UV(8,96)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(23)<1>		ubCURR_UV(29,2)<16;16,1>		-ubPREV_UV(8,112)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(24)<1>		ubCURR_UV(30,2)<16;16,1>		-ubPREV_UV(8,128)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(25)<1>		ubCURR_UV(31,2)<16;16,1>		-ubPREV_UV(8,144)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(26)<1>		ubCURR_UV(32,2)<16;16,1>		-ubPREV_UV(8,160)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(27)<1>		ubCURR_UV(33,2)<16;16,1>		-ubPREV_UV(8,176)<16;16,1>		//Diff UV interleaved
+    add (16)	wDIFF_TEMPORAL(28)<1>		ubCURR_UV(34,2)<16;16,1>		-ubPREV_UV(8,192)<16;16,1>		//Diff UV interleaved
+
+//3 more lines
+    add (16)	wCURBE_TEMP(4)<1>		ubCURR_UV(35,2)<16;16,1>		-ubPREV_UV(8,208)<16;16,1>		//Diff UV interleaved
+    add (16)	wCURBE_TEMP(5)<1>		ubCURR_UV(36,2)<16;16,1>		-ubPREV_UV(8,224)<16;16,1>		//Diff UV interleaved
+    add (16)	wCURBE_TEMP(6)<1>		ubCURR_UV(37,2)<16;16,1>		-ubPREV_UV(8,240)<16;16,1>		//Diff UV interleaved
+
+	//16x4 to 8x4 - First 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>		uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>		uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+	//8x4 to 4x4 - First 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>		{ AccWrEn }
+
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(16)<16;16,1>	(abs)wDIFF_TEMPORAL(17)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(18)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(19)<16;16,1>
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(20)<16;16,1>	(abs)wDIFF_TEMPORAL(21)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(22)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(23)<16;16,1>
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(24)<16;16,1>	(abs)wDIFF_TEMPORAL(25)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(26)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(2)<1>	acc0.0<16;16,1>:uw					(abs)wDIFF_TEMPORAL(27)<16;16,1>
+
+	//16x16 to 16x4 - Second 16 lines
+	add (16)	acc0.0<1>:uw					(abs)wDIFF_TEMPORAL(28)<16;16,1>	(abs)wCURBE_TEMP(4)<16;16,1>
+	add (16)	acc0.0<1>:uw					acc0.0<16;16,1>:uw					(abs)wCURBE_TEMP(5)<16;16,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(3)<1>	acc0.0<16;16,1>:uw					(abs)wCURBE_TEMP(6)<16;16,1>
+
+	//Find if block is motion block - First 16 lines
+	cmp.g.f0.0  (16) null<1>:w				uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<16;16,1> 		r61.26<0;2,1>:ub
+
+	//Move TEMPORAL_SUM4x4 for SIMD16 use later.
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,0)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,2)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,4)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,6)<0;2,1>      
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,8)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,10)<0;2,1>     
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,12)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(0,14)<0;2,1>     
+
+	//Pick Appropriate Weight History Based on motion. - First 16 lines
+	(-f0.0) mov (16) uwCURBE_TEMP(0)<1>		uwCURBE_TEMP(2)<16;16,1>
+
+	//Actual DN - First 16 lines
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(0)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(0)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(2,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(2,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(2,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,0)<8;8,1>					uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,8)<8;8,1>					uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(0)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(0)<1> 			ubCURR_UV(2,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(0)<1>	wDIFF_TEMPORAL(0)<16;16,1>					ubCURR_UV(2,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(1)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(1)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(3,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(3,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(3,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,16)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,24)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(1)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(1)<1>		ubCURR_UV(3,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(1)<1>	wDIFF_TEMPORAL(1)<16;16,1>				ubCURR_UV(3,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(2)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(2)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(4,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(4,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(4,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,32)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,40)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(2)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(2)<1>		ubCURR_UV(4,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(2)<1>	wDIFF_TEMPORAL(2)<16;16,1>				ubCURR_UV(4,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(3)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(3)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(5,2)<8;8,1>					-uwCURBE_TEMP(0,0)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(5,10)<8;8,1>					-uwCURBE_TEMP(0,2)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(5,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,48)<8;8,1>				uwCURBE_TEMP(0,0)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,56)<8;8,1>				uwCURBE_TEMP(0,2)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(3)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(3)<1>		ubCURR_UV(5,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(3)<1>	wDIFF_TEMPORAL(3)<16;16,1>				ubCURR_UV(5,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(4)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(4)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(6,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(6,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(6,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,64)<8;8,1>					uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,72)<8;8,1>					uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(4)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(4)<1> 			ubCURR_UV(6,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(4)<1>	wDIFF_TEMPORAL(4)<16;16,1>					ubCURR_UV(6,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(5)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(5)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(7,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(7,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(7,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,80)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,88)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(5)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(5)<1>		ubCURR_UV(7,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(5)<1>	wDIFF_TEMPORAL(5)<16;16,1>				ubCURR_UV(7,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(6)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(6)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(8,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(8,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(8,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,96)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,104)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(6)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(6)<1>		ubCURR_UV(8,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(6)<1>	wDIFF_TEMPORAL(6)<16;16,1>				ubCURR_UV(8,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(7)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(7)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(9,2)<8;8,1>					-uwCURBE_TEMP(0,4)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(9,10)<8;8,1>					-uwCURBE_TEMP(0,6)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(9,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,112)<8;8,1>				uwCURBE_TEMP(0,4)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,120)<8;8,1>				uwCURBE_TEMP(0,6)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(7)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(7)<1>		ubCURR_UV(9,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(7)<1>	wDIFF_TEMPORAL(7)<16;16,1>				ubCURR_UV(9,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(8)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(8)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(10,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(10,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(10,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,128)<8;8,1>					uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,136)<8;8,1>					uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(8)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(8)<1> 			ubCURR_UV(10,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(8)<1>	wDIFF_TEMPORAL(8)<16;16,1>					ubCURR_UV(10,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(9)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(9)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(11,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(11,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(11,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,144)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,152)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(9)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(9)<1>		ubCURR_UV(11,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(9)<1>	wDIFF_TEMPORAL(9)<16;16,1>				ubCURR_UV(11,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(10)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(10)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(12,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(12,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(12,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,160)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,168)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(10)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(10)<1>		ubCURR_UV(12,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(10)<1>	wDIFF_TEMPORAL(10)<16;16,1>				ubCURR_UV(12,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(11)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(11)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(13,2)<8;8,1>					-uwCURBE_TEMP(0,8)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(13,10)<8;8,1>					-uwCURBE_TEMP(0,10)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(13,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,176)<8;8,1>				uwCURBE_TEMP(0,8)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,184)<8;8,1>				uwCURBE_TEMP(0,10)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(11)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(11)<1>		ubCURR_UV(13,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(11)<1>	wDIFF_TEMPORAL(11)<16;16,1>				ubCURR_UV(13,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(12)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(12)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(14,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(14,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(14,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,192)<8;8,1>					uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,200)<8;8,1>					uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr (16) wDIFF_TEMPORAL(12)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(12)<1> 			ubCURR_UV(14,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(12)<1>	wDIFF_TEMPORAL(12)<16;16,1>					ubCURR_UV(14,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(13)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(13)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(15,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(15,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(15,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,208)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,216)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(13)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(13)<1>		ubCURR_UV(15,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(13)<1>	wDIFF_TEMPORAL(13)<16;16,1>				ubCURR_UV(15,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(14)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(14)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(16,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(16,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(16,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,224)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,232)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(14)<1> 		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(14)<1>		ubCURR_UV(16,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(14)<1>	wDIFF_TEMPORAL(14)<16;16,1>				ubCURR_UV(16,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(15)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(15)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(17,2)<8;8,1>					-uwCURBE_TEMP(0,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(17,10)<8;8,1>					-uwCURBE_TEMP(0,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(17,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(0,240)<8;8,1>				uwCURBE_TEMP(0,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(0,248)<8;8,1>				uwCURBE_TEMP(0,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(15)<1>		acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(15)<1>		ubCURR_UV(17,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(15)<1>	wDIFF_TEMPORAL(15)<16;16,1>				ubCURR_UV(17,2)<16;16,1>
+
+
+	//16x4 to 8x4 - Second 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(0)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+	add (16)	uwDIFF_TEMPORAL_SUM4x4(1)<1>	uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+	//8x4 to 4x4 - Second 16 lines
+	add (16)	uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<1>	uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1>		uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>     { AccWrEn }
+
+	//Find if block is motion block - Second 16 lines
+	cmp.g.f1.0  (16) null<1>:w				uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<16;16,1> 		r61.26<0;2,1>:ub
+
+	//Move TEMPORAL_SUM4x4 for SIMD16 use later.
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,0)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,2)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,4)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,6)<0;2,1>      
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,8)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,10)<0;2,1>     
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,12)<0;2,1>		
+	mov (8)     uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1>                 uwDIFF_TEMPORAL_SUM4x4_FINAL(1,14)<0;2,1>     
+
+	//Pick Appropriate Weight History Based on motion. - Second 16 lines
+	(-f1.0) mov (16) uwCURBE_TEMP(1)<1>		uwCURBE_TEMP(3)<16;16,1>
+
+	//Actual DN - Second 16 lines
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(16)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(16)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(22,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(22,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(22,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,0)<8;8,1>					uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,8)<8;8,1>					uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(16)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(16)<1>			ubCURR_UV(22,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(16)<1>	wDIFF_TEMPORAL(16)<16;16,1>			ubCURR_UV(22,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(17)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(17)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(23,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(23,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(23,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,16)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,24)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(17)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(17)<1>			ubCURR_UV(23,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(17)<1>	wDIFF_TEMPORAL(17)<16;16,1>			ubCURR_UV(23,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(18)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(18)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(24,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(24,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(24,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,32)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,40)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(18)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(18)<1>			ubCURR_UV(24,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(18)<1>	wDIFF_TEMPORAL(18)<16;16,1>			ubCURR_UV(24,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(19)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(19)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(25,2)<8;8,1>					-uwCURBE_TEMP(1,0)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(25,10)<8;8,1>					-uwCURBE_TEMP(1,2)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(25,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,48)<8;8,1>				uwCURBE_TEMP(1,0)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,56)<8;8,1>				uwCURBE_TEMP(1,2)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(19)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(19)<1>			ubCURR_UV(25,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(19)<1>	wDIFF_TEMPORAL(19)<16;16,1>			ubCURR_UV(25,2)<16;16,1>
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(20)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(20)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(26,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(26,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(26,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,64)<8;8,1>					uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,72)<8;8,1>					uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(20)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(20)<1>			ubCURR_UV(26,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(20)<1>	wDIFF_TEMPORAL(20)<16;16,1>			ubCURR_UV(26,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(21)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(21)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(27,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(27,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(27,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,80)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,88)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(21)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(21)<1>			ubCURR_UV(27,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(21)<1>	wDIFF_TEMPORAL(21)<16;16,1>			ubCURR_UV(27,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(22)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(22)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(28,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(28,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(28,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,96)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,104)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(22)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(22)<1>			ubCURR_UV(28,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(22)<1>	wDIFF_TEMPORAL(22)<16;16,1>			ubCURR_UV(28,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(23)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(23)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(29,2)<8;8,1>					-uwCURBE_TEMP(1,4)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(29,10)<8;8,1>					-uwCURBE_TEMP(1,6)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(29,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,112)<8;8,1>				uwCURBE_TEMP(1,4)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,120)<8;8,1>				uwCURBE_TEMP(1,6)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(23)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(23)<1>			ubCURR_UV(29,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(23)<1>	wDIFF_TEMPORAL(23)<16;16,1>			ubCURR_UV(29,2)<16;16,1>
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(24)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(24)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(30,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(30,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(30,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,128)<8;8,1>					uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,136)<8;8,1>					uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(24)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(24)<1>			ubCURR_UV(30,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(24)<1>	wDIFF_TEMPORAL(24)<16;16,1>			ubCURR_UV(30,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(25)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(25)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(31,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(31,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(31,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,144)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,152)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(25)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(25)<1>			ubCURR_UV(31,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(25)<1>	wDIFF_TEMPORAL(25)<16;16,1>			ubCURR_UV(31,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(26)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(26)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(32,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(32,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(32,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,160)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,168)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(26)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(26)<1>			ubCURR_UV(32,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(26)<1>	wDIFF_TEMPORAL(26)<16;16,1>			ubCURR_UV(32,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w							(abs)wDIFF_TEMPORAL(27)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w							(abs)wDIFF_TEMPORAL(27)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w									ubCURR_UV(33,2)<8;8,1>					-uwCURBE_TEMP(1,8)<0;2,1>
+	mul (8)	acc0.8<1>:w									ubCURR_UV(33,10)<8;8,1>					-uwCURBE_TEMP(1,10)<0;2,1>
+	mac (16)	acc0<1>:w								ubCURR_UV(33,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w									ubPREV_UV(8,176)<8;8,1>				uwCURBE_TEMP(1,8)<0;2,1>
+	mac (8)	acc0.8<1>:w									ubPREV_UV(8,184)<8;8,1>				uwCURBE_TEMP(1,10)<0;2,1>
+	add (16)	acc0<1>:w								acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(27)<1>			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(27)<1>			ubCURR_UV(33,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w							uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(27)<1>	wDIFF_TEMPORAL(27)<16;16,1>			ubCURR_UV(33,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wDIFF_TEMPORAL(28)<16;16,1>			r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wDIFF_TEMPORAL(28)<16;16,1>			r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(34,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(34,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(34,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,192)<8;8,1>					uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,200)<8;8,1>					uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wDIFF_TEMPORAL(28)<1> 			acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wDIFF_TEMPORAL(28)<1>			ubCURR_UV(34,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wDIFF_TEMPORAL(28)<1>	wDIFF_TEMPORAL(28)<16;16,1>				ubCURR_UV(34,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(4)<16;16,1>				r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(4)<16;16,1>				r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(35,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(35,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(35,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,208)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,216)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wCURBE_TEMP(4)<1>				acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wCURBE_TEMP(4)<1>				ubCURR_UV(35,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wCURBE_TEMP(4)<1>		wCURBE_TEMP(4)<16;16,1>				ubCURR_UV(35,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(5)<16;16,1>				r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(5)<16;16,1>				r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(36,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(36,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(36,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,224)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,232)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wCURBE_TEMP(5)<1> 				acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wCURBE_TEMP(5)<1>				ubCURR_UV(36,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wCURBE_TEMP(5)<1>		wCURBE_TEMP(5)<16;16,1>				ubCURR_UV(36,2)<16;16,1>
+
+	cmp.l.f0.0 (16)	null<1>:w						(abs)wCURBE_TEMP(6)<16;16,1>				r61.28<0;2,1>:ub
+	cmp.l.f1.0 (16) null<1>:w						(abs)wCURBE_TEMP(6)<16;16,1>				r61.30<0;2,1>:ub
+	mul (8)	acc0.0<1>:w								ubCURR_UV(37,2)<8;8,1>					-uwCURBE_TEMP(1,12)<0;2,1>
+	mul (8)	acc0.8<1>:w								ubCURR_UV(37,10)<8;8,1>					-uwCURBE_TEMP(1,14)<0;2,1>
+	mac (16)	acc0<1>:w							ubCURR_UV(37,2)<16;16,1>					256:w
+	mac (8)	acc0.0<1>:w								ubPREV_UV(8,240)<8;8,1>				uwCURBE_TEMP(1,12)<0;2,1>
+	mac (8)	acc0.8<1>:w								ubPREV_UV(8,248)<8;8,1>				uwCURBE_TEMP(1,14)<0;2,1>
+	add (16)	acc0<1>:w							acc0<16;16,1>:w								128:w
+ 	(f0.0) shr  (16) wCURBE_TEMP(6)<1>				acc0<16;16,1>:w								8:w
+	(-f0.0) mov (16) wCURBE_TEMP(6)<1>				ubCURR_UV(37,2)<16;16,1>
+	cmp.le.f0.0 (16) null<1>:w						uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> 	r61.26<0;2,1>:ub
+	(-f0.0.allv) avg (16)	wCURBE_TEMP(6)<1>		wCURBE_TEMP(6)<16;16,1>				ubCURR_UV(37,2)<16;16,1>
+
+	//Pack Weight History WORD -> BYTE
+	mov (16) ubCURBE_TEMP(3,0)<1>		ubCURBE_TEMP(0)<32;16,2>	
+	mov (16) ubCURBE_TEMP(3,16)<1>		ubCURBE_TEMP(1)<32;16,2>	
+
+
+
+//Module Name 	: DN_UV_Compute_BNE_UV
+//Author		: Tatiya, Rupesh
+//Description	: Computes minimum SOAD for each 16x4 block.
+
+	cmp.l.f0.0  (8) null:w     				uwSOAD_MIN_8x4(0,12)<16;4,1> 	uwSOAD_MIN_8x4(2,12)<16;4,1>
+	(f0.0)sel	(8) uwCURBE_TEMP(1,0)<1>	uwSOAD_MIN_8x4(0,12)<16;4,1> 	uwSOAD_MIN_8x4(2,12)<16;4,1>
+
+    mov  (8)	ubCURBE_TEMP(1)<1>			ubCURBE_TEMP(1)<16;8,2>
+
+
+
+//File Name		: DN_UV_PL3_Unpack_Denoised_UV.asm
+//Author		: Tatiya, Rupesh
+//Description	: Upack the interleaved UV data
+
+//First 16 lines.
+	mov  (8)	ubMSGPAYLOAD_U(0,0)<1>		ubDIFF_TEMPORAL(0,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(0,16)<1>	ubDIFF_TEMPORAL(1,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(0,0)<1>		ubDIFF_TEMPORAL(0,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(0,16)<1>	ubDIFF_TEMPORAL(1,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(1,0)<1>		ubDIFF_TEMPORAL(2,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(1,16)<1>	ubDIFF_TEMPORAL(3,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(1,0)<1>		ubDIFF_TEMPORAL(2,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(1,16)<1>	ubDIFF_TEMPORAL(3,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(2,0)<1>		ubDIFF_TEMPORAL(4,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(2,16)<1>	ubDIFF_TEMPORAL(5,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(2,0)<1>		ubDIFF_TEMPORAL(4,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(2,16)<1>	ubDIFF_TEMPORAL(5,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(3,0)<1>		ubDIFF_TEMPORAL(6,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(3,16)<1>	ubDIFF_TEMPORAL(7,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(3,0)<1>		ubDIFF_TEMPORAL(6,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(3,16)<1>	ubDIFF_TEMPORAL(7,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(4,0)<1>		ubDIFF_TEMPORAL(8,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(4,16)<1>	ubDIFF_TEMPORAL(9,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(4,0)<1>		ubDIFF_TEMPORAL(8,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(4,16)<1>	ubDIFF_TEMPORAL(9,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(5,0)<1>		ubDIFF_TEMPORAL(10,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(5,16)<1>	ubDIFF_TEMPORAL(11,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(5,0)<1>		ubDIFF_TEMPORAL(10,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(5,16)<1>	ubDIFF_TEMPORAL(11,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(6,0)<1>		ubDIFF_TEMPORAL(12,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(6,16)<1>	ubDIFF_TEMPORAL(13,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(6,0)<1>		ubDIFF_TEMPORAL(12,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(6,16)<1>	ubDIFF_TEMPORAL(13,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(7,0)<1>		ubDIFF_TEMPORAL(14,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_U(7,16)<1>	ubDIFF_TEMPORAL(15,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(7,0)<1>		ubDIFF_TEMPORAL(14,2)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(7,16)<1>	ubDIFF_TEMPORAL(15,2)<32;8,4>		
+
+//Second 16 lines.
+//12 lines first
+	mov  (8)	ubMSGPAYLOAD_U(0,8)<1>		ubDIFF_TEMPORAL(16,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(0,24)<1>	ubDIFF_TEMPORAL(17,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(0,8)<1>		ubDIFF_TEMPORAL(16,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(0,24)<1>	ubDIFF_TEMPORAL(17,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(1,8)<1>		ubDIFF_TEMPORAL(18,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(1,24)<1>	ubDIFF_TEMPORAL(19,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(1,8)<1>		ubDIFF_TEMPORAL(18,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(1,24)<1>	ubDIFF_TEMPORAL(19,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(2,8)<1>		ubDIFF_TEMPORAL(20,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(2,24)<1>	ubDIFF_TEMPORAL(21,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(2,8)<1>		ubDIFF_TEMPORAL(20,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(2,24)<1>	ubDIFF_TEMPORAL(21,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(3,8)<1>		ubDIFF_TEMPORAL(22,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(3,24)<1>	ubDIFF_TEMPORAL(23,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(3,8)<1>		ubDIFF_TEMPORAL(22,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(3,24)<1>	ubDIFF_TEMPORAL(23,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(4,8)<1>		ubDIFF_TEMPORAL(24,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(4,24)<1>	ubDIFF_TEMPORAL(25,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(4,8)<1>		ubDIFF_TEMPORAL(24,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(4,24)<1>	ubDIFF_TEMPORAL(25,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(5,8)<1>		ubDIFF_TEMPORAL(26,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(5,24)<1>	ubDIFF_TEMPORAL(27,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(5,8)<1>		ubDIFF_TEMPORAL(26,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(5,24)<1>	ubDIFF_TEMPORAL(27,2)<32;8,4>	
+
+	//3 lines next
+	mov  (8)	ubMSGPAYLOAD_U(6,8)<1>		ubDIFF_TEMPORAL(28,0)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_U(6,24)<1>		ubCURBE_TEMP(4,0)<32;8,4>		
+	mov  (8)	ubMSGPAYLOAD_V(6,8)<1>		ubDIFF_TEMPORAL(28,2)<32;8,4>	
+	mov  (8)	ubMSGPAYLOAD_V(6,24)<1>		ubCURBE_TEMP(4,2)<32;8,4>		
+
+	mov  (8)	ubMSGPAYLOAD_U(7,8)<1>		ubCURBE_TEMP(5,0)<32;8,4>	
+    mov  (8)	ubMSGPAYLOAD_U(7,24)<1>		ubCURBE_TEMP(6,0)<32;8,4>	
+    mov  (8)	ubMSGPAYLOAD_V(7,8)<1>		ubCURBE_TEMP(5,2)<32;8,4>	
+    mov  (8)	ubMSGPAYLOAD_V(7,24)<1>		ubCURBE_TEMP(6,2)<32;8,4>	
+
+
+
+//Module Name 	: DN_UV_420_Save_Curr_Frame_Y
+//Author		: Tatiya, Rupesh
+//Description	: Save Curr Frame Y data for 420 Input
+
+
+
+//Module Name 	: DN_UV_Load_Curr_Frame_Y
+//Author		: Tatiya, Rupesh
+//Description	: Saves Y or YUY2 of Current frame.
+
+
+
+
+	mov (8)		acc0.0<1>:ud		r0.0<8;8,1>:ud
+	mov (2)		acc0.0<1>:d			r62.10<2;2,1>:w
+
+	mov (1)		acc0.2<1>:d			0xF000F:ud
+
+	mov (8)     r92.0<1>:ud	acc0.0<8;8,1>:ud
+
+	mov (8)     r101.0<1>:ud	acc0.0<8;8,1>:ud
+	mov (8)     r110.0<1>:ud	acc0.0<8;8,1>:ud
+	mov (8)     r119.0<1>:ud	acc0.0<8;8,1>:ud
+
+	add (1)		r101.1<1>:d 	acc0.1<0;1,0>:d   		16:d
+
+	add (1)		r110.0<1>:d 	acc0.0<0;1,0>:d   		16:d
+
+	add (2)		r119.0<1>:d 	acc0.0<2;2,1>:d   		16:d
+
+	send (8)	null<1>:d	r92		0x5		0x120A8018:ud
+	send (8)	null<1>:d	r101		0x5		0x120A8018:ud
+	send (8)	null<1>:d	r110		0x5		0x120A8018:ud
+	send (8)	null<1>:d	r119		0x5		0x120A8018:ud
+
+
+	//TODO - See if History saving can be combined with Curr Frame Save. - rT
+
+
+//Module Name 	: DN_UV_Save_Hist_UV
+//Author		: Tatiya, Rupesh
+//Description	: Saves DN history for UV data.
+
+	mov (8)  r3.0<1>:ud	r0.0<8;8,1>:ud
+	mov (2)	 r3.0<1>:d	r62.12<2;2,1>:w				
+	mov (1)	 r3.2<1>:d	0x30007:ud		
+
+	send (8) null<1>:d	r3		0x5		0x40A8021:ud
+
+
+
+//Module Name	: DN_UV_Save_BNE_UV
+//Author		: Tatiya, Rupesh
+//Description	: Saves BNE values for 16x16 U and 16x16 V.
+
+	mov (8)  r1.0<1>:ud	r0.0<8;8,1>:ud
+	mov (2)	 r1.0<1>:d		r63.12<2;2,1>:w					
+	mov (1)	 r1.2<1>:d		0x10003:ud		
+
+	send (8) null<1>:d	r1		0x5		0x40A8023:ud
+
+
+
+//File Name 	: DN_UV_PL3_Save_Curr_Frame_UV.asm
+//Author		: Tatiya, Rupesh
+//Description	: Save U and V data for PL3 surface
+
+
+
+//Module name 	:  DN_UV_Save_Curr_Frame_UV
+//Author		:  Tatiya, Rupesh
+//Description	:  Saves Current Frame (UV only).
+
+
+
+
+	mov  (8) r74<1>:ud		r0.0<8;8,1>:ud
+	mov  (8) r83<1>:ud		r0.0<8;8,1>:ud
+
+	shr (2)  r74.0<1>:d		r62.10<2;2,1>:w			1:w			
+	mov (1)	 r74.2<1>:d		0xF000F:ud		
+
+	mov (8)  r83.0<1>:ud		r74.0<8;8,1>:ud
+
+	send (8) null<1>:d	r74	0x5		0x120A8019:ud
+	send (8) null<1>:d	r83	0x5		0x120A801A:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+	//All sub-routines here
+
+
+// Module Name  : Noise_Detection
+// Author		: Tatiya, Rupesh
+// Description	: Performs noise detection on 32 pixels of U (8x4) and 32 pixels of V (8x4).
+
+DN_UV_NOISE_DETECTION_UV:
+
+// Find Field Block Median
+//
+// Purpose   : Find the median value of the nine pixels in the same field
+//             which are centered at current pixel.
+//
+//             Works on 9 pixels centered at the current pixel
+//                NOTE: pixels are within same field.
+//                      v4 - current pixel
+//
+//                  v2 v1 v0
+//                   *  *  *     <--- Different field - not used
+//                  v5 v4 v3
+//                   *  *  *     <--- Different field - not used
+//                  v8 v7 v6
+
+// Algorithm to find median modifies the data.
+// Copy the data needed to calculate median so the original source data stays intact.
+//
+
+//TODO - Change Interleaved implementation to separated one if - ,  does not work on predication. - rT
+
+//Delete Later - rT
+//mov (1) pCUR_UV:uw		52*32:uw
+
+// v0
+mov (16) ubMEDIAN_TEMP(0,0)<1>    	r[a0.0,0]<16;16,1>		
+// v0
+mov (16) ubMEDIAN_TEMP(0,16)<1>   	r[a0.0,32]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(1,0)<1>    	r[a0.0,2]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(1,16)<1>   	r[a0.0,34]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(2,0)<1>    	r[a0.0,4]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(2,16)<1>   	r[a0.0,36]<16;16,1>		
+// v3
+mov (16) ubMEDIAN_TEMP(3,0)<1>    	r[a0.0,64]<16;16,1>  	
+// v3
+mov (16) ubMEDIAN_TEMP(3,16)<1>   	r[a0.0,96]<16;16,1>		
+// v4
+mov (16) ubMEDIAN_TEMP(4,0)<1>		r[a0.0,66]<16;16,1>  	
+// v4
+mov (16) ubMEDIAN_TEMP(4,16)<1>   	r[a0.0,98]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(5,0)<1>		r[a0.0,68]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(5,16)<1>   	r[a0.0,100]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(6,0)<1>    	r[a0.0,128]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(6,16)<1>   	r[a0.0,160]<16;16,1>		
+// v7
+mov (16) ubMEDIAN_TEMP(7,0)<1>		r[a0.0,130]<16;16,1>  	
+// v7
+mov (16) ubMEDIAN_TEMP(7,16)<1>   	r[a0.0,162]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(8,0)<1>		r[a0.0,132]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(8,16)<1>   	r[a0.0,164]<16;16,1>  	
+
+//TODO - Optimize one instruction here.
+add (1)  a0.0:uw		a0.0<0;1,0>:uw 64:uw
+// v0
+mov (16) ubMEDIAN_TEMP(9,0)<1>    	r[a0.0,0]<16;16,1>		
+// v0
+mov (16) ubMEDIAN_TEMP(9,16)<1>   	r[a0.0,32]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(10,0)<1>    	r[a0.0,2]<16;16,1>		
+// v1
+mov (16) ubMEDIAN_TEMP(10,16)<1>   	r[a0.0,34]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(11,0)<1>    	r[a0.0,4]<16;16,1>		
+// v2
+mov (16) ubMEDIAN_TEMP(11,16)<1>   	r[a0.0,36]<16;16,1>		
+// v3
+mov (16) ubMEDIAN_TEMP(12,0)<1>    	r[a0.0,64]<16;16,1>  	
+// v3
+mov (16) ubMEDIAN_TEMP(12,16)<1>   	r[a0.0,96]<16;16,1>		
+// v4
+mov (16) ubMEDIAN_TEMP(13,0)<1>		r[a0.0,66]<16;16,1>  	
+// v4
+mov (16) ubMEDIAN_TEMP(13,16)<1>   	r[a0.0,98]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(14,0)<1>		r[a0.0,68]<16;16,1>  	
+// v5
+mov (16) ubMEDIAN_TEMP(14,16)<1>   	r[a0.0,100]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(15,0)<1>    	r[a0.0,128]<16;16,1>  	
+// v6
+mov (16) ubMEDIAN_TEMP(15,16)<1>   	r[a0.0,160]<16;16,1>		
+// v7
+mov (16) ubMEDIAN_TEMP(16,0)<1>		r[a0.0,130]<16;16,1>  	
+// v7
+mov (16) ubMEDIAN_TEMP(16,16)<1>   	r[a0.0,162]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(17,0)<1>		r[a0.0,132]<16;16,1>  	
+// v8
+mov (16) ubMEDIAN_TEMP(17,16)<1>   	r[a0.0,164]<16;16,1>  	
+
+//TODO - Optimize one instruction here.
+add (1)  a0.0:uw		a0.0<0;1,0>:uw 64:uw
+
+// MedianSwap
+//
+//  MedianSwap(inOutLeft, inOutRight)
+//  {
+//      if (inOutLeft > inOutRight)
+//      {
+//          temp = inOutLeft
+//          inOutLeft = inOutRight
+//          inOutRight = temp
+//      }
+//  }
+
+// MedianSwap(v1, v2) - U
+// MedianSwap(v4, v5) - U
+// MedianSwap(v1, v2) - V
+// MedianSwap(v4, v5) - V
+
+cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>  ubMEDIAN_TEMP(2,0)<32;16,2>
+cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  ubMEDIAN_TEMP(5,0)<32;16,2>
+cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(2,1)<32;16,2>
+cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
+
+       	mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(1,0)<32;16,2>	
+       	mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>	
+       	mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(1,1)<32;16,2>	
+		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>	
+
+(f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2>     	ubMEDIAN_TEMP(2,0)<32;16,2>		
+(f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>		ubMEDIAN_TEMP(5,0)<32;16,2>		
+(f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2>  		ubMEDIAN_TEMP(2,1)<32;16,2>		
+(f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(5,1)<32;16,2>		
+
+(f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>		
+(f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2>     ubTEMP1(0,16)<16;16,1>		
+(f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(1,0)<16;16,1>		
+(f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2>     ubTEMP1(1,16)<16;16,1>   	
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(7,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(0,0)<32;16,2>  	ubMEDIAN_TEMP(1,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(7,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(0,1)<32;16,2> 	ubMEDIAN_TEMP(1,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(7,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(0,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(7,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(0,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2>   ubMEDIAN_TEMP(8,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2>	ubMEDIAN_TEMP(1,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2>	ubMEDIAN_TEMP(1,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(1,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(1,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(3,0)<32;16,2>   	ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(6,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(3,1)<32;16,2> 	ubMEDIAN_TEMP(4,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(6,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(3,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(6,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(3,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(6,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(3,0)<2>   ubMEDIAN_TEMP(4,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(6,0)<2>	ubMEDIAN_TEMP(7,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(3,1)<2>  	ubMEDIAN_TEMP(4,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(6,1)<2>	ubMEDIAN_TEMP(7,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(2,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(1,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(1,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2>   ubMEDIAN_TEMP(2,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>	ubMEDIAN_TEMP(5,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2>  	ubMEDIAN_TEMP(2,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>	ubMEDIAN_TEMP(5,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(7,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(0,0)<32;16,2>  	ubMEDIAN_TEMP(3,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(7,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(0,1)<32;16,2> 	ubMEDIAN_TEMP(3,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(7,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(0,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(7,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(0,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2>   ubMEDIAN_TEMP(8,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2>	ubMEDIAN_TEMP(3,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2>	ubMEDIAN_TEMP(3,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(3,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(3,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,0)<32;16,2> 	ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,1)<32;16,2> 	ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0)  mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0)  mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(5,0)<32;16,2>   	ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(5,1)<32;16,2> 	ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(5,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(5,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(5,0)<2>    	ubMEDIAN_TEMP(8,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>		ubMEDIAN_TEMP(7,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(5,1)<2>  	ubMEDIAN_TEMP(8,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(7,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0  (16) null:w         ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0  (16) null:w         ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0)  mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0)  mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(3,0)<32;16,2>   	ubMEDIAN_TEMP(6,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(1,0)<32;16,2>  	ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(3,1)<32;16,2> 	ubMEDIAN_TEMP(6,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(1,1)<32;16,2> 	ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(6,0)<2>     ubMEDIAN_TEMP(3,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubMEDIAN_TEMP(1,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(6,1)<2>     ubMEDIAN_TEMP(3,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubMEDIAN_TEMP(1,1)<32;16,2>      
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(2,0)<32;16,2>   	ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>  	ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(2,1)<32;16,2> 	ubMEDIAN_TEMP(5,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2> 	ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubMEDIAN_TEMP(5,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2>     ubMEDIAN_TEMP(7,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubMEDIAN_TEMP(5,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2>     ubMEDIAN_TEMP(7,1)<32;16,2>      
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2>  	ubMEDIAN_TEMP(2,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(4,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(4,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(2,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(2,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(2,1)<2>     ubTEMP1(0,16)<16;16,1>	
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(6,0)<32;16,2>   ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(6,1)<32;16,2>   ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(6,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(6,1)<32;16,2>		
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(4,0)<32;16,2>   	ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(4,1)<32;16,2>  	ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2>    	ubMEDIAN_TEMP(2,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2>		ubMEDIAN_TEMP(2,1)<32;16,2>		
+cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>  ubMEDIAN_TEMP(11,0)<32;16,2>
+cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  ubMEDIAN_TEMP(14,0)<32;16,2>
+cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(11,1)<32;16,2>
+cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
+
+       	mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(10,0)<32;16,2>	
+       	mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>	
+       	mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(10,1)<32;16,2>	
+		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>	
+
+(f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2>     	ubMEDIAN_TEMP(11,0)<32;16,2>		
+(f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>		ubMEDIAN_TEMP(14,0)<32;16,2>		
+(f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2>  		ubMEDIAN_TEMP(11,1)<32;16,2>		
+(f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(14,1)<32;16,2>		
+
+(f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>		
+(f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2>     ubTEMP1(0,16)<16;16,1>		
+(f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(1,0)<16;16,1>		
+(f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2>     ubTEMP1(1,16)<16;16,1>   	
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(16,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(9,0)<32;16,2>  	ubMEDIAN_TEMP(10,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(16,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(9,1)<32;16,2> 	ubMEDIAN_TEMP(10,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(16,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(9,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(16,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(9,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2>   ubMEDIAN_TEMP(17,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2>	ubMEDIAN_TEMP(10,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2>	ubMEDIAN_TEMP(10,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(10,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(10,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(12,0)<32;16,2>   	ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(15,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(12,1)<32;16,2> 	ubMEDIAN_TEMP(13,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(15,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(12,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(15,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(12,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(15,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(12,0)<2>   ubMEDIAN_TEMP(13,0)<32;16,2>	   
+ (f0.1) mov (16) ubMEDIAN_TEMP(15,0)<2>	ubMEDIAN_TEMP(16,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(12,1)<2>  	ubMEDIAN_TEMP(13,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(15,1)<2>	ubMEDIAN_TEMP(16,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(11,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(10,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(10,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2>   ubMEDIAN_TEMP(11,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>	ubMEDIAN_TEMP(14,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2>  	ubMEDIAN_TEMP(11,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>	ubMEDIAN_TEMP(14,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(16,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(9,0)<32;16,2>  	ubMEDIAN_TEMP(12,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(16,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(9,1)<32;16,2> 	ubMEDIAN_TEMP(12,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(16,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(9,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(16,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(9,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2>   ubMEDIAN_TEMP(17,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2>	ubMEDIAN_TEMP(12,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2>	ubMEDIAN_TEMP(12,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(12,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(12,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,0)<32;16,2> 	ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0  (16) null:w          			ubMEDIAN_TEMP(%1+0,1)<32;16,2> 	ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0)  mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0)  mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1>   		ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(14,0)<32;16,2>   	ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(14,1)<32;16,2> 	ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(14,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,0)<32;16,2>		
+        mov (16) ubTEMP1(1,0)<1>      ubMEDIAN_TEMP(14,1)<32;16,2>		
+ 		mov (16) ubTEMP1(1,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(14,0)<2>    	ubMEDIAN_TEMP(17,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>		ubMEDIAN_TEMP(16,0)<32;16,2>     
+ (f1.0) mov (16) ubMEDIAN_TEMP(14,1)<2>  	ubMEDIAN_TEMP(17,1)<32;16,2>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(16,1)<32;16,2>     
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2>     ubTEMP1(0,16)<16;16,1>    
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2>     ubTEMP1(1,0)<16;16,1>     
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2>     ubTEMP1(1,16)<16;16,1>    
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0  (16) null:w         ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0  (16) null:w         ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0)  mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0)  mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1>   	ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(12,0)<32;16,2>   	ubMEDIAN_TEMP(15,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(10,0)<32;16,2>  	ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(12,1)<32;16,2> 	ubMEDIAN_TEMP(15,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(10,1)<32;16,2> 	ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(15,0)<2>     ubMEDIAN_TEMP(12,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubMEDIAN_TEMP(10,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(15,1)<2>     ubMEDIAN_TEMP(12,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubMEDIAN_TEMP(10,1)<32;16,2>      
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(11,0)<32;16,2>   	ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>  	ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w          ubMEDIAN_TEMP(11,1)<32;16,2> 	ubMEDIAN_TEMP(14,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2> 	ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubMEDIAN_TEMP(14,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2>     ubMEDIAN_TEMP(16,0)<32;16,2>      
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubMEDIAN_TEMP(14,1)<32;16,2>      
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2>     ubMEDIAN_TEMP(16,1)<32;16,2>      
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2>  	ubMEDIAN_TEMP(11,1)<32;16,2>
+
+        mov (16) ubTEMP1(0,0)<1>      ubMEDIAN_TEMP(13,0)<32;16,2>		
+        mov (16) ubTEMP1(0,16)<1>     ubMEDIAN_TEMP(13,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(11,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(11,1)<32;16,2>		
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2>     ubTEMP1(0,0)<16;16,1>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(11,1)<2>     ubTEMP1(0,16)<16;16,1>	
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(15,0)<32;16,2>   ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(15,1)<32;16,2>   ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(15,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(15,1)<32;16,2>		
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w          ubMEDIAN_TEMP(13,0)<32;16,2>   	ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w          ubMEDIAN_TEMP(13,1)<32;16,2>  	ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2>    	ubMEDIAN_TEMP(11,0)<32;16,2>		
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2>		ubMEDIAN_TEMP(11,1)<32;16,2>		
+
+// Sobel Value calculation for the current pixel v4
+//          v2 v1 v0
+//           *  *  *     <--- Different field - not used
+//          v5 v4 v3
+//           *  *  *     <--- Different field - not used
+//          v8 v7 v6
+//
+//    Gx = -v0 - 2*v3 - v6 + v2 + 2*v5 + v8
+//    Gy =  v0 + 2*v1 + v2 - v6 - 2*v7 - v8
+//
+//  Sobel = (|Gx| + |Gy|) >> 3
+
+//TODO - Change Later - rT
+add (1) a0.0:uw  a0.0<0;1,0>:uw -128:uw
+
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,64]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,132]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,0]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,128]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,4]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(0)<1> 	r[a0.0,68]<16;16,1>:ub   		2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,96]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,164]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,32]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,160]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,36]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(1)<1> 	r[a0.0,100]<16;16,1>:ub   		2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,128]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,196]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,64]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,192]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,68]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(2)<1> 	r[a0.0,132]<16;16,1>:ub   		2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w  		r[a0.0,160]<16;16,1>:ub  		-2:w
+// + v8
+mac (16) acc0.0<1>:w  		r[a0.0,228]<16;16,1>:ub   		1:w
+// - v0
+mac (16) acc0.0<1>:w  		r[a0.0,96]<16;16,1>:ub  		-1:w
+// - v6
+mac (16) acc0.0<1>:w  		r[a0.0,224]<16;16,1>:ub  		-1:w
+// + v2
+mac (16) acc0.0<1>:w  		r[a0.0,100]<16;16,1>:ub   		1:w
+// + 2 * v5
+mac (16) wSOBEL_X(3)<1> 	r[a0.0,164]<16;16,1>:ub   		2:w
+
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,2]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,0]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,132]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,4]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,128]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,130]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(0)<16;16,1>
+
+shr (16) uwSOBEL(0)<1>	acc0.0<16;16,1>:uw   3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,34]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,32]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,164]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,36]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,160]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,162]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(1)<16;16,1>
+
+shr (16) uwSOBEL(1)<1>	acc0.0<16;16,1>:uw   3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,66]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,64]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,196]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,68]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,192]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,194]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(2)<16;16,1>
+
+shr (16) uwSOBEL(2)<1>	acc0.0<16;16,1>:uw   3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w  r[a0.0,98]<16;16,1>:ub   	2:w
+// + v0
+mac (16) acc0.0<1>:w  r[a0.0,96]<16;16,1>:ub   	1:w
+// - v8
+mac (16) acc0.0<1>:w  r[a0.0,228]<16;16,1>:ub  -1:w
+// + v2
+mac (16) acc0.0<1>:w  r[a0.0,100]<16;16,1>:ub   	1:w
+// - v6
+mac (16) acc0.0<1>:w  r[a0.0,224]<16;16,1>:ub  -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w  r[a0.0,226]<16;16,1>:ub  -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w		(abs)wSOBEL_X(3)<16;16,1>
+
+shr (16) uwSOBEL(3)<1>	acc0.0<16;16,1>:uw   3:uw
+
+//Mov Median in CURBE_TEMP to free up temp space.
+mov (16)	ubMEDIAN(0,0)<1>  	ubMEDIAN_TEMP(4,0)<16;16,1>		
+mov (16)	ubMEDIAN(0,16)<1> ubMEDIAN_TEMP(4,16)<16;16,1>		
+mov (16)	ubMEDIAN(0,32)<1>  	ubMEDIAN_TEMP(13,0)<16;16,1>		
+mov (16)	ubMEDIAN(0,48)<1> ubMEDIAN_TEMP(13,16)<16;16,1>		
+
+// Find:
+//      absDiff = abs(ubCurY - ubMedian)
+// Find the difference between pixel and median value.
+
+//Median is interleaved. So difference is also interleaved.
+
+//------------------------------------------------------------------------------------------
+//Process 16 U and 16 V pixels here and rest later.
+// first row - v0,v1,v2
+add (16) wDIFF(0)<1>   r[a0.0,0]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(1)<1>   r[a0.0,2]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(2)<1>   r[a0.0,4]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(3)<1>   r[a0.0,64]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(4)<1>   r[a0.0,66]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(5)<1>   r[a0.0,68]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(6)<1>   r[a0.0,128]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(7)<1>   r[a0.0,130]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(8)<1>   r[a0.0,132]<16;16,1>:ub  -ubMEDIAN(0,0)<16;16,1>
+// first row - v0,v1,v2
+add (16) wDIFF(9)<1>   r[a0.0,32]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(10)<1>   r[a0.0,34]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(11)<1>   r[a0.0,36]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(12)<1>   r[a0.0,96]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(13)<1>   r[a0.0,98]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(14)<1>   r[a0.0,100]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(15)<1>   r[a0.0,160]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(16)<1>   r[a0.0,162]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(17)<1>   r[a0.0,164]<16;16,1>:ub  -ubMEDIAN(0,16)<16;16,1>
+
+//TODO - Change Later - rT
+add (1) a0.0:uw  a0.0<0;1,0>:uw 64:uw
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//First 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(0)<16;16,1>  (abs)wDIFF(1)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(2)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(3)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(4)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(5)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(6)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(7)<16;16,1>
+	add        (16) uwSOAD(0)<1>  	 acc0.0<16;16,1>:uw 		(abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//------------
+	//DIFF(0-7) is not needed here. Populate it.
+	// first row - v0,v1,v2
+	add (16) wDIFF(0)<1>   r[a0.0,0]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(1)<1>   r[a0.0,2]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(2)<1>   r[a0.0,4]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+
+	// second row - v3,v4,v5
+	add (16) wDIFF(3)<1>   r[a0.0,64]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(4)<1>   r[a0.0,66]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(5)<1>   r[a0.0,68]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+
+	// third row - v6,v7
+	add (16) wDIFF(6)<1>   r[a0.0,128]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+	add (16) wDIFF(7)<1>   r[a0.0,130]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(0)<1> uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+
+//------------
+	//Load v8 - DIFF(8)
+	add (16) wDIFF(8)<1>   			r[a0.0,132]<16;16,1>:ub  -ubMEDIAN(1,0)<16;16,1>
+//------------
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(9)<16;16,1>  (abs)wDIFF(10)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(11)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(12)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(13)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(14)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(15)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(16)<16;16,1>
+	add        (16) uwSOAD(1)<1>  	 acc0.0<16;16,1>:uw 		(abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//------------
+	//DIFF(0-7) is not needed here. Populate it.
+	// first row - v0,v1,v2
+	add (16) wDIFF(9)<1>   r[a0.0,32]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(10)<1>   r[a0.0,34]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(11)<1>   r[a0.0,36]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+
+	// second row - v3,v4,v5
+	add (16) wDIFF(12)<1>   r[a0.0,96]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(13)<1>   r[a0.0,98]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(14)<1>   r[a0.0,100]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+
+	// third row - v6,v7
+	add (16) wDIFF(15)<1>   r[a0.0,160]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+	add (16) wDIFF(16)<1>   r[a0.0,162]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(1)<1> uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+
+//------------
+	//Load v8 - DIFF(8)
+	add (16) wDIFF(17)<1>   			r[a0.0,164]<16;16,1>:ub  -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max-block_min) < m_LocalDiffThreshold))
+//						if (sigma_mb_min > sigma)
+//							sigma_mb_min = sigma;
+
+//NOTE: block_min is always zero as median is one of the value in 3x3 block. So no need o calculate it.
+//		So just do -
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max) < m_LocalDiffThreshold) && ( sigma < sigma_mb_min))
+//							sigma_mb_min = sigma;
+
+//We are processing 32 bytes of U and 32 bytes of V - each of size 8x4.
+//Compare first 8 bytes with max possible (255).
+//Start above condition from second 8 bytes.
+
+//TODO - Change Later - rT
+//	mov (1)	pCUR_MIN_SOAD_8x4:uw	1752:uw		//r54.24:ub
+
+//First row of 8x4
+        cmp.l.f0.0 	(16) null:uw     		uwSOBEL(0)<16;16,1>         r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(0)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(0)<16;16,1>			255:uw
+(f0.0)  sel 		(16) uwSOBEL(0)<1>   uwSOAD(0)<16;16,1>			255:uw
+
+//Second row of 8x4
+		cmp.l.f0.0 	(16) null:uw     		uwSOBEL(1)<16;16,1>         r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(1)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(1)<16;16,1>			uwSOBEL(0)<16;16,1>
+(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(1)<16;16,1>
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//Second 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(0)<16;16,1>  (abs)wDIFF(1)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(2)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(3)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(4)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(5)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(6)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(7)<16;16,1>
+	add        (16) uwSOAD(0)<1> 	 acc0.0<16;16,1>:uw 		(abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1>   (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1>   (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1>   (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1>   (abs)wDIFF(7)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(8)<16;16,1>
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw         (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw         (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw         (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw         (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+	add        (16) acc0.0<1>:uw     (abs)wDIFF(9)<16;16,1>  (abs)wDIFF(10)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(11)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(12)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(13)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(14)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(15)<16;16,1>
+	add        (16) acc0.0<1>:uw     acc0.0<16;16,1>:uw 		(abs)wDIFF(16)<16;16,1>
+	add        (16) uwSOAD(1)<1> 	 acc0.0<16;16,1>:uw 		(abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1>   (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1>   (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1>   (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1>   (abs)wDIFF(16)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw      uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw      uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1>  uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1>  uwCURBE_TEMP(2)<16;16,1>   uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw      		uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel  (16) uwCURBE_TEMP(0)<1> 	uwCURBE_TEMP(0)<16;16,1>   uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw      			uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+(f0.0)sel  (16) uwMAX_ABS_DIFF(1)<1> 	uwCURBE_TEMP(0)<16;16,1>   (abs)wDIFF(17)<16;16,1>
+
+//Third row of 8x4
+        cmp.l.f0.0 	(16) null:uw     		uwSOBEL(2)<16;16,1>     	r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(0)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(0)<16;16,1>			uwSOBEL(0)<16;16,1>
+(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(0)<16;16,1>
+
+//Fourth row of 8x4
+		cmp.l.f0.0 	(16) null:uw     		uwSOBEL(3)<16;16,1>     	r55.30<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwMAX_ABS_DIFF(1)<16;16,1>  r55.28<0;2,1>:ub
+(f0.0)  cmp.l.f0.0 	(16) null:uw     		uwSOAD(1)<16;16,1>			uwSOBEL(0)<16;16,1>
+(f0.0)  mov 		(16) uwSOBEL(0)<1>   uwSOAD(1)<16;16,1>
+
+		cmp.l.f0.0 	(8) null:uw     		uwSOBEL(0,0)<8;8,1>  	uwSOBEL(0,8)<8;8,1>
+(f0.0)  sel 		(8) uwSOBEL(0)<1>   	uwSOBEL(0,0)<8;8,1>  	uwSOBEL(0,8)<8;8,1>
+
+		cmp.l.f0.0 	(4) null:uw     		uwSOBEL(0,0)<4;4,1>  	uwSOBEL(0,4)<4;4,1>
+(f0.0)  sel 		(4) uwSOBEL(0)<1>   	uwSOBEL(0,0)<4;4,1>  	uwSOBEL(0,4)<4;4,1>
+
+		cmp.l.f0.0 	(2) null:uw     					uwSOBEL(0,0)<2;2,1>  uwSOBEL(0,2)<2;2,1>
+(f0.0)  sel 		(2) r[a0.1,0]<1>:uw   	uwSOBEL(0,0)<2;2,1>  uwSOBEL(0,2)<2;2,1>
+
+
+
+
+
+
+// End of common.inc
+
+mov (1) ip:ud r7.7<0;1,0>:d
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL3_DN_422CP.g4a b/src/shaders/post_processing/gen7/PL3_DN_422CP.g4a
new file mode 100644
index 0000000..8192108
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_DN_422CP.g4a
@@ -0,0 +1,544 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//  117    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PL3_DN_422CP
+.code
+
+
+
+// FileName:	DN_PL_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block) for planar format
+
+
+
+// FileName:	DN.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x45E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(4,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+
+	mov (2)    mudMSGHDR_HIST(1)<1>    	udDNDI_RESP(4,0)<2;2,1>    	// Move denoise history to MRF (4x2)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x10003:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x50003:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE |   X  |   X   |  X  |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	//|            X             |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	mov (1)		mubMSGHDR_ENC_STATS(1,0)<1>		ubDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr }				// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,3)<1>		uwDNDI_RESP(4,11)<0;1,0>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,4)<1>		uwDNDI_RESP(4,12)<2;2,1>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,9)<1>		uwDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,10)<1>	uwDNDI_RESP(4,9)<2;2,1>    		{ NoDDChk }				// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Load_UV_IMC3_16x8.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x8 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Load_16x8.asm
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x8 block through DATAPORT
+
+//CHANGE	:  Read extra UV data to convert to 422. -rT
+//we are reading extra data in ALL cases irrespective of whether upsampling is reqd or not later on, to keep things simple.
+
+
+	add (2)		r27.0<1>:d				r7.0<2;2,1>:w				r4.4<2;2,1>:w				// Source Y Block origin
+	asr (2)  	r27.0<1>:d     			r27.0<2;2,1>:d       	1:w   						{ NoDDClr }	// U/V block origin should be half of Y's
+	mov (1)		r27.2<1>:ud				0x40007:ud  					 	{ NoDDChk }	// U/V block width and height (8x5)
+    mov (8)     r36<1>:ud    		r27.0<8;8,1>:ud
+    mov (8)     r38<1>:ud    	 	r27.0<8;8,1>:ud
+	send (8)	udDNDI_UV_RESP(0)<1>		r36	0x4	0x2290001:ud
+	send (8)	udDNDI_UV_RESP(2)<1>		r38	0x4	0x2290002:ud
+
+	//Update Header for Save
+	mov (1)		mudMSGHDR_UCOPY(0,2)<1>	0x30007:ud										//  U block width and height (8x4)
+	mov (1)		mudMSGHDR_VCOPY(0,2)<1>	0x30007:ud										//  V block width and height (8x4)
+
+
+
+// FileName:    DN_Save_Y_16x8.asm
+// Author:      Vivek Kumar
+// Description: Save one 16x8 blocks of Y channel of DN output for reference
+
+
+mov (8)     mudDN_Y_OUT(0,0)<1>     r0<8;8,1>:ud                                // message header
+mov (2)     mudDN_Y_OUT(0,0)<1>     r7.0<2;2,1>:w                  { NoDDClr }     // X origin
+mov (1)     mudDN_Y_OUT(0,2)<1>     0x7000F:ud    { NoDDChk }     // block width and height (16x8)
+
+//send out data through data port
+send (8)    null<1>:d    mudDN_Y_OUT      0x5    0xA0A8018:ud
+
+
+
+// FileName:	DN_Save_UV_IMC3_16x8.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x8 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Save_16x8.asm
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x8 block through DATAPORT
+
+
+//Reuse the header from Load component
+//Header is modified at the end of load - to be usable for save.
+
+
+	mov (8)		mudMSGHDR_UCOPY(1)<1>		udDNDI_UV_RESP(0)<8;8,1>
+	mov (8)		mudMSGHDR_VCOPY(1)<1>		udDNDI_UV_RESP(2)<8;8,1>
+    send (4)    null<1>:d    r36	0x5    0x40A8019:ud
+    send (4)    null<1>:d    r38	0x5    0x40A801A:ud
+
+
+
+// FileName:	DN_Upsample_UV_IMC3_16x8.asm
+// Author:		Tatiya, Rupesh
+// Description:	Upconvert 420 UV to 422
+
+
+
+// FileName:	UVCopy_Upsample_UV_16x8.asm
+// Author:		Tatiya, Rupesh
+// Description:	Convert 42X UV to 422 - to be used for IECP.
+
+
+	avg.sat (16) 	uwDNDI_UVCOPY_TEMP(0)		ubDNDI_UV_RESP(0,0)<0;8,1>     ubDNDI_UV_RESP(0,0)<8;8,1>
+	avg.sat (16) 	uwDNDI_UVCOPY_TEMP(4)	ubDNDI_UV_RESP(2,0)<0;8,1>     ubDNDI_UV_RESP(2,0)<8;8,1>
+	avg.sat (16) 	uwDNDI_UVCOPY_TEMP(1)		ubDNDI_UV_RESP(0,8)<0;8,1>     ubDNDI_UV_RESP(0,8)<8;8,1>
+	avg.sat (16) 	uwDNDI_UVCOPY_TEMP(5)	ubDNDI_UV_RESP(2,8)<0;8,1>     ubDNDI_UV_RESP(2,8)<8;8,1>
+	avg.sat (16) 	uwDNDI_UVCOPY_TEMP(2)		ubDNDI_UV_RESP(0,16)<0;8,1>     ubDNDI_UV_RESP(0,16)<8;8,1>
+	avg.sat (16) 	uwDNDI_UVCOPY_TEMP(6)	ubDNDI_UV_RESP(2,16)<0;8,1>     ubDNDI_UV_RESP(2,16)<8;8,1>
+	avg.sat (16) 	uwDNDI_UVCOPY_TEMP(3)		ubDNDI_UV_RESP(0,24)<0;8,1>     ubDNDI_UV_RESP(0,24)<8;8,1>
+	avg.sat (16) 	uwDNDI_UVCOPY_TEMP(7)	ubDNDI_UV_RESP(2,24)<0;8,1>     ubDNDI_UV_RESP(2,24)<8;8,1>
+
+	mov		(16)	ubDNDI_RESP(5, 1)<2>	ubDNDI_UVCOPY_TEMP(0,0)<32;16,2>		{ NoDDClr }
+	mov		(16)	ubDNDI_RESP(5, 0)<2>		ubDNDI_UVCOPY_TEMP(4,0)<32;16,2>		{ NoDDChk }
+	mov		(16)	ubDNDI_RESP(5, 33)<2>	ubDNDI_UVCOPY_TEMP(1,0)<32;16,2>		{ NoDDClr }
+	mov		(16)	ubDNDI_RESP(5, 32)<2>		ubDNDI_UVCOPY_TEMP(5,0)<32;16,2>		{ NoDDChk }
+	mov		(16)	ubDNDI_RESP(5, 65)<2>	ubDNDI_UVCOPY_TEMP(2,0)<32;16,2>		{ NoDDClr }
+	mov		(16)	ubDNDI_RESP(5, 64)<2>		ubDNDI_UVCOPY_TEMP(6,0)<32;16,2>		{ NoDDChk }
+	mov		(16)	ubDNDI_RESP(5, 97)<2>	ubDNDI_UVCOPY_TEMP(3,0)<32;16,2>		{ NoDDClr }
+	mov		(16)	ubDNDI_RESP(5, 96)<2>		ubDNDI_UVCOPY_TEMP(7,0)<32;16,2>		{ NoDDChk }
+
+
+
+// FileName:	DN_Save_422CP_16x8.asm
+// Author:		Vivek Kumar
+// Description:	Save one 16x8 blocks of DN output to the color pipe in 4-2-2 format
+
+
+.declare mubMSGHDR_DN_OUT_2   Base=r36.0      ElementSize=1  Type=ub
+
+
+mov (8)		mudMSGHDR_DN_OUT(0)<1>		r0<8;8,1>:ud            			// message header
+shl (1)     mdMSGHDR_DN_OUT(0,0)<1>		r7.0<0;1,0>:w     1:w  		{ NoDDClr }            // X origin * 2 (422 output)
+mov (1)     mdMSGHDR_DN_OUT(0,1)<1>		r7.1<0;1,0>:w          		{ NoDDClr, NoDDChk }   // Y origin
+mov (1)     mudMSGHDR_DN_OUT(0,2)<1>	0x7000F:ud	{ NoDDClr, NoDDChk }            // block width and height (16x8)
+
+//M0.3	- 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1)		mudMSGHDR_DN_OUT(0,3)<1>		r2.4<0;1,0>:ud 	r7.26<0;1,0>:b		{ NoDDChk }
+
+// First 8 x 8 Block
+	mov (8)		mubMSGHDR_DN_OUT(1)<2>			ubDNDI_RESP(0,0)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(1,16)<2>		ubDNDI_RESP(0,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(2)<2>			ubDNDI_RESP(0,32)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(2,16)<2>		ubDNDI_RESP(0,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(3)<2>			ubDNDI_RESP(0,64)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(3,16)<2>		ubDNDI_RESP(0,80)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(4)<2>			ubDNDI_RESP(0,96)<8;8,1>				{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT(4,16)<2>		ubDNDI_RESP(0,112)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(1,1)<4>   	ubDNDI_RESP(5,1)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(1,17)<4>   	ubDNDI_RESP(5,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(1,3)<4>   	ubDNDI_RESP(5,0)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(1,19)<4>   	ubDNDI_RESP(5,16)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(2,1)<4>   	ubDNDI_RESP(5,33)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(2,17)<4>   	ubDNDI_RESP(5,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(2,3)<4>   	ubDNDI_RESP(5,32)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(2,19)<4>   	ubDNDI_RESP(5,48)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(3,1)<4>   	ubDNDI_RESP(5,65)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(3,17)<4>   	ubDNDI_RESP(5,81)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(3,3)<4>   	ubDNDI_RESP(5,64)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(3,19)<4>   	ubDNDI_RESP(5,80)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(4,1)<4>   	ubDNDI_RESP(5,97)<8;4,2>			{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(4,17)<4>   	ubDNDI_RESP(5,113)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT(4,3)<4>   	ubDNDI_RESP(5,96)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT(4,19)<4>   	ubDNDI_RESP(5,112)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+// Second 8 x 8 Block
+mov	(8)	r36.0<1>:ud		r31.0<8;8,1>:ud
+add	(1)	r36.0<1>:ud		r36.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DN_OUT_2(1)<2>		ubDNDI_RESP(0,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(1,16)<2>	ubDNDI_RESP(0,24)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(2)<2>		ubDNDI_RESP(0,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(2,16)<2>	ubDNDI_RESP(0,56)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(3)<2>		ubDNDI_RESP(0,72)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(3,16)<2>	ubDNDI_RESP(0,88)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(4)<2>		ubDNDI_RESP(0,104)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DN_OUT_2(4,16)<2>	ubDNDI_RESP(0,120)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(1,1)<4>   	ubDNDI_RESP(5,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(1,17)<4>   	ubDNDI_RESP(5,25)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(1,3)<4>   	ubDNDI_RESP(5,8)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(1,19)<4>   	ubDNDI_RESP(5,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(2,1)<4>   	ubDNDI_RESP(5,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(2,17)<4>   	ubDNDI_RESP(5,57)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(2,3)<4>   	ubDNDI_RESP(5,40)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(2,19)<4>   	ubDNDI_RESP(5,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(3,1)<4>   	ubDNDI_RESP(5,73)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(3,17)<4>   	ubDNDI_RESP(5,89)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(3,3)<4>   	ubDNDI_RESP(5,72)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(3,19)<4>   	ubDNDI_RESP(5,88)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(4,1)<4>   	ubDNDI_RESP(5,105)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(4,17)<4>   	ubDNDI_RESP(5,121)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DN_OUT_2(4,3)<4>   	ubDNDI_RESP(5,104)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DN_OUT_2(4,19)<4>   	ubDNDI_RESP(5,120)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+//send out data through data port
+send (8)    null<1>:d    r31.0		0x5    0xA0A801B:ud
+send (8)    null<1>:d    r36.0	0x5    0xA0A801B:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL3_DN_PL3.g4a b/src/shaders/post_processing/gen7/PL3_DN_PL3.g4a
new file mode 100644
index 0000000..baeb2d7
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_DN_PL3.g4a
@@ -0,0 +1,425 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   44    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PL3_DN_PL3
+.code
+
+
+
+// FileName:	DN_PL_Core.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block) for planar format
+
+
+
+// FileName:	DN.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x45E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(4,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DN_Hist_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8)    r27<1>:ud				r0.0<8;8,1>:ud                   			// message header   
+
+
+	mov (2)    mudMSGHDR_HIST(1)<1>    	udDNDI_RESP(4,0)<2;2,1>    	// Move denoise history to MRF (4x2)
+
+
+shr (2)    r27.0<1>:ud		r7.0<2;2,1>:w				2:w                                 	// X,Y origin / 4
+add (1)    r27.0<1>:ud		r27.0<0;1,0>:ud			r1.12<0;1,0>:uw		{ NoDDClr }  	// Add pitch to X origin
+mov (1)    r27.2<1>:ud		0x10003:ud									{ NoDDChk }  	// block width and height
+
+mov (8)		mudMSGHDR_HIST(0)<1>		r27.0<8;8,1>:ud
+send (8)	null<1>:d	r22	0x5		0x40A8021:ud
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x50003:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE |   X  |   X   |  X  |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	//|            X             |           X           |
+	//----------------------------------------------------
+	//|     X      |     SVCM    |           X           |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |           X           |
+	//----------------------------------------------------
+	mov (1)		mubMSGHDR_ENC_STATS(1,0)<1>		ubDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr }				// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,3)<1>		uwDNDI_RESP(4,11)<0;1,0>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,4)<1>		uwDNDI_RESP(4,12)<2;2,1>    	{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (1)		muwMSGHDR_ENC_STATS(1,9)<1>		uwDNDI_RESP(4,8)<0;1,0>    		{ NoDDClr, NoDDChk }			// Move encoder statistics to MRF
+	mov (2)		muwMSGHDR_ENC_STATS(1,10)<1>	uwDNDI_RESP(4,9)<2;2,1>    		{ NoDDChk }				// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:	DN_Load_UV_IMC3_16x8.asm 
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x8 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Load_16x8.asm
+// Author:		Vivek Kumar
+// Description:	Read UV for 16x8 block through DATAPORT
+
+//CHANGE	:  Read extra UV data to convert to 422. -rT
+//we are reading extra data in ALL cases irrespective of whether upsampling is reqd or not later on, to keep things simple.
+
+
+	add (2)		r27.0<1>:d				r7.0<2;2,1>:w				r4.4<2;2,1>:w				// Source Y Block origin
+	asr (2)  	r27.0<1>:d     			r27.0<2;2,1>:d       	1:w   						{ NoDDClr }	// U/V block origin should be half of Y's
+	mov (1)		r27.2<1>:ud				0x40007:ud  					 	{ NoDDChk }	// U/V block width and height (8x5)
+    mov (8)     r36<1>:ud    		r27.0<8;8,1>:ud
+    mov (8)     r38<1>:ud    	 	r27.0<8;8,1>:ud
+	send (8)	udDNDI_UV_RESP(0)<1>		r36	0x4	0x2290001:ud
+	send (8)	udDNDI_UV_RESP(2)<1>		r38	0x4	0x2290002:ud
+
+	//Update Header for Save
+	mov (1)		mudMSGHDR_UCOPY(0,2)<1>	0x30007:ud										//  U block width and height (8x4)
+	mov (1)		mudMSGHDR_VCOPY(0,2)<1>	0x30007:ud										//  V block width and height (8x4)
+
+
+
+// FileName:    DN_Save_Y_16x8.asm
+// Author:      Vivek Kumar
+// Description: Save one 16x8 blocks of Y channel of DN output for reference
+
+
+mov (8)     mudDN_Y_OUT(0,0)<1>     r0<8;8,1>:ud                                // message header
+mov (2)     mudDN_Y_OUT(0,0)<1>     r7.0<2;2,1>:w                  { NoDDClr }     // X origin
+mov (1)     mudDN_Y_OUT(0,2)<1>     0x7000F:ud    { NoDDChk }     // block width and height (16x8)
+
+//send out data through data port
+send (8)    null<1>:d    mudDN_Y_OUT      0x5    0xA0A8018:ud
+
+
+
+// FileName:	DN_Save_UV_IMC3_16x8.asm 
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x8 block through DATAPORT 
+
+
+
+// FileName:	UVCopy_Save_16x8.asm
+// Author:		Vivek Kumar
+// Description:	Save UV for 16x8 block through DATAPORT
+
+
+//Reuse the header from Load component
+//Header is modified at the end of load - to be usable for save.
+
+
+	mov (8)		mudMSGHDR_UCOPY(1)<1>		udDNDI_UV_RESP(0)<8;8,1>
+	mov (8)		mudMSGHDR_VCOPY(1)<1>		udDNDI_UV_RESP(2)<8;8,1>
+    send (4)    null<1>:d    r36	0x5    0x40A8019:ud
+    send (4)    null<1>:d    r38	0x5    0x40A801A:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL_DI_422CP.g4a b/src/shaders/post_processing/gen7/PL_DI_422CP.g4a
new file mode 100644
index 0000000..87db22f
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL_DI_422CP.g4a
@@ -0,0 +1,461 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   87    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PL_DI_422CP
+.code
+
+
+
+// FileName:	DI.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DI only case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:    DI_Save_422CP_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in 422 format to Color Pipe (IECP)
+
+
+.declare mubMSGHDR_DI_OUT1_1  Base=r18.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT1_2  Base=r21.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_1  Base=r24.0      ElementSize=1  Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_2  Base=r27.0      ElementSize=1  Type=ub
+
+
+mov (8) r27.0<1>:ud     r0.0<8;8,1>:ud
+shl (1) r27.0<1>:ud     r7.0<0;1,0>:w            1:w  { NoDDClr }          // H. block origin need to be doubled
+mov (1) r27.1<1>:ud     r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r27.2<1>:ud     0x3000F:ud        { NoDDClr, NoDDChk }       // Block width and height (16x8)
+
+//M0.3  - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1)  r27.3<1>:ud     r2.4<0;1,0>:ud     r7.26<0;1,0>:b     { NoDDChk }
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r27<8;8,1>:ud
+mov (8) r24.0<1>:ud       r27<8;8,1>:ud
+
+
+// Pack 2nd field Y; First 8x4 block
+	mov (8)		mubMSGHDR_DI_OUT1_1(1)<2>			ubDNDI_RESP(0,0)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(1,16)<2>		ubDNDI_RESP(0,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(2)<2>			ubDNDI_RESP(0,32)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_1(2,16)<2>		ubDNDI_RESP(0,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; First 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,1)<4>   		ubDNDI_RESP(2,1)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,17)<4>   	ubDNDI_RESP(2,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,3)<4>	  	ubDNDI_RESP(2,0)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(1,19)<4>   	ubDNDI_RESP(2,16)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,1)<4>   		ubDNDI_RESP(2,33)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,17)<4>   	ubDNDI_RESP(2,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,3)<4>	  	ubDNDI_RESP(2,32)<8;4,2>			{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_1(2,19)<4>   	ubDNDI_RESP(2,48)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+
+// Pack 2nd field Y; Second 8x4 block
+mov	(8)	r21.0<1>:ud		r18.0<8;8,1>:ud
+add	(1)	r21.0<1>:ud		r21.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DI_OUT1_2(1)<2>			ubDNDI_RESP(0,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(1,16)<2>		ubDNDI_RESP(0,24)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(2)<2>			ubDNDI_RESP(0,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT1_2(2,16)<2>		ubDNDI_RESP(0,56)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; Second 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,1)<4>   		ubDNDI_RESP(2,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,17)<4>		ubDNDI_RESP(2,25)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,3)<4>   		ubDNDI_RESP(2,8)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(1,19)<4>		ubDNDI_RESP(2,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,1)<4>   		ubDNDI_RESP(2,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,17)<4>		ubDNDI_RESP(2,57)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,3)<4>   		ubDNDI_RESP(2,40)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT1_2(2,19)<4>		ubDNDI_RESP(2,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+send (8)    null<1>:d    r18.0   0x5     0x60A801B:ud
+send (8)    null<1>:d    r21.0   0x5     0x60A801B:ud
+
+// Pack 1st field Y; 1st 8x4 block
+	mov (8)		mubMSGHDR_DI_OUT2_1(1)<2>			ubDNDI_RESP(4,0)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(1,16)<2>		ubDNDI_RESP(4,16)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(2)<2>			ubDNDI_RESP(4,32)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_1(2,16)<2>		ubDNDI_RESP(4,48)<8;8,1>			{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 1st field U,V; 1st 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,1)<4>   		ubDNDI_RESP(6,1)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,17)<4>   	ubDNDI_RESP(6,17)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,3)<4>	  	ubDNDI_RESP(6,0)<8;4,2>		    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(1,19)<4>   	ubDNDI_RESP(6,16)<8;4,2>	    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,1)<4>   		ubDNDI_RESP(6,33)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,17)<4>   	ubDNDI_RESP(6,49)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,3)<4>	  	ubDNDI_RESP(6,32)<8;4,2>		    { NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_1(2,19)<4>   	ubDNDI_RESP(6,48)<8;4,2>	    { NoDDChk }    	// copy line of V directly to memory as optimization
+
+// Pack 1st field Y; 2nd 8x4 block
+mov	(8)	r27.0<1>:ud		r24.0<8;8,1>:ud
+add	(1)	r27.0<1>:ud		r27.0<0;1,0>:w		0x10:w
+
+	mov (8)		mubMSGHDR_DI_OUT2_2(1)<2>			ubDNDI_RESP(4,8)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(1,16)<2>		ubDNDI_RESP(4,24)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(2)<2>			ubDNDI_RESP(4,40)<8;8,1>			{ NoDDClr } 	// copy line of Y directly to memory as optimization
+	mov (8)		mubMSGHDR_DI_OUT2_2(2,16)<2>		ubDNDI_RESP(4,56)<8;8,1>		{ NoDDClr, NoDDChk } 	// copy line of Y directly to memory as optimization
+
+// Pack 1st field U, V; 2nd 8x4 block
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,1)<4>   		ubDNDI_RESP(6,9)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,17)<4>		ubDNDI_RESP(6,25)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,3)<4>   		ubDNDI_RESP(6,8)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(1,19)<4>		ubDNDI_RESP(6,24)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,1)<4>   		ubDNDI_RESP(6,41)<8;4,2>		{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,17)<4>		ubDNDI_RESP(6,57)<8;4,2>	{ NoDDClr, NoDDChk } 	// copy line of U directly to memory as optimization
+
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,3)<4>   		ubDNDI_RESP(6,40)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+	mov (4)     mubMSGHDR_DI_OUT2_2(2,19)<4>		ubDNDI_RESP(6,56)<8;4,2>		{ NoDDChk }    	// copy line of V directly to memory as optimization
+
+send (8)    null<1>:d    r24.0     0x5     0x60A801E:ud
+send (8)    null<1>:d    r27.0     0x5     0x60A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL_DI_PA.g4a b/src/shaders/post_processing/gen7/PL_DI_PA.g4a
new file mode 100644
index 0000000..6f56e0d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL_DI_PA.g4a
@@ -0,0 +1,399 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   57    // Total instruction count
+//    1    // Total kernel count
+
+.kernel PL_DI_PA
+.code
+
+
+
+// FileName:	DI.asm 
+// Author:		Vivek Kumar
+// Description:	Tasks for DI only case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:    DNDI.inc
+// Author:      Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs:      DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI     Base=r18      ElementSize=4    Type=ud
+.declare mdMSGHDR_DNDI      Base=r18      ElementSize=4    Type=d
+.declare mwMSGHDR_DNDI      Base=r18      ElementSize=2    Type=w
+
+
+.declare mudMSGHDR_STMM     Base=r20      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_HIST     Base=r22      ElementSize=4    Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4   Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2   Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1   Type=ub
+
+
+.declare mudMSGHDR_DN_OUT   Base=r31.0      ElementSize=4  Type=ud
+.declare mdMSGHDR_DN_OUT    Base=r31.0      ElementSize=4  Type=d
+.declare mubMSGHDR_DN_OUT   Base=r31.0      ElementSize=1  Type=ub
+
+
+.declare mudMSGHDR_UVCOPY   Base=r36      ElementSize=4  Type=ud
+.declare mdMSGHDR_UVCOPY    Base=r36      ElementSize=4  Type=d
+.declare mudMSGHDR_UCOPY    Base=r36       ElementSize=4  Type=ud
+.declare mudMSGHDR_VCOPY    Base=r38       ElementSize=4  Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1  Base=r18.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT1  Base=r18.0      ElementSize=1     Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2  Base=r23.0      ElementSize=4     Type=ud
+.declare mubMSGHDR_DI_OUT2  Base=r23.0      ElementSize=1     Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT        Base=r45.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP        Base=r46.0 ElementSize=4 SrcRegion=<8;8,1>   DstRegion=<1> Type=ud
+.declare uwDNDI_RESP        Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP        Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP     Base=r58.0 ElementSize=4 SrcRegion=<8;8,1>  DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP     Base=r58.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1>    DstRegion=<1> Type=uw       //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1>    DstRegion=<1> Type=ub       //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+    // Message descriptor   for sampler read
+    //                    = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+    //                      1 (header present 1) 0 11 (SIMD32/64 mode)
+    //                      1000 (message type) 0000 (DI state index)
+    //                      00000000 (binding table index - set later)
+    //                    = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName:	DNDI_Command.asm 
+// Author:		Vivek Kumar
+// Description:	Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8)		mudMSGHDR_DNDI(0)<1>			r0.0<8;8,1>:ud					// message header
+mov (1)		mwMSGHDR_DNDI(1,4)<1>			r7.0<0;1,0>:w		{ NoDDClr }		// horizontal origin	// Do we need to add offset here? -vK
+mov (1)		mwMSGHDR_DNDI(1,12)<1>			r7.1<0;1,0>:w		{ NoDDChk }		// vertical origin		// Can these 2 be combined? - vK
+
+send (8)	udDNDI_RESP(0)<1>	r18	0x2	0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) 	r7.0<1>:w     uwDNDI_RESP(9,14)<2;2,1>	// horizontal/Vertial origin in W.14 and W.15    
+
+
+
+// FileName:	DI_STMM_Save.asm 
+// Author:		Vivek Kumar
+// Description:	Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8)     mudMSGHDR_STMM(0)<1>	r0.0<8;8,1>:ud               							// message header   
+mov (8)     mudMSGHDR_STMM(1)<1>   	udDNDI_RESP(8,0)         					// Move STMM to MRF 
+
+shr (1)     mudMSGHDR_STMM(0,0)<1>	r7.0<0;1,0>:w            1:w     { NoDDClr } 		// X origin / 2
+mov (1)     mudMSGHDR_STMM(0,1)<1>	r7.1<0;1,0>:w                    { NoDDClr, NoDDChk }  	// Y origin
+mov (1)     mudMSGHDR_STMM(0,2)<1>	0x30007:ud           { NoDDChk } 		// block width and height (8x4)
+
+send (8)	null<1>:d	r20	0x5		0x40A8021:ud      
+
+
+
+// FileName:	DNDI_Enc_Stats_Save.asm
+// Author:		Vivek Kumar
+// Description:	Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8)		mudMSGHDR_ENC_STATS(1)<1>	0x0:ud						// Init payload MRF
+mov (8)		mudMSGHDR_ENC_STATS(0)<1>	r0.0<8;8,1>:ud				// message header
+
+shr (1)		mudMSGHDR_ENC_STATS(0,0)<1>		r7.0<0;1,0>:w            	1:w  	{ NoDDClr }			//enable the flag after testing on si           			{ NoDDClr }	// X origin / 2
+mul (1)		acc0.1<1>:ud					r7.1<0;1,0>:w				3:w																							// Y origin * 3
+shr (1)		mudMSGHDR_ENC_STATS(0,1)<1>		acc0.1<0;1,0>:ud			2:w		{ NoDDClr, NoDDChk }		//enable the flag after testing on si			   { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1)		mudMSGHDR_ENC_STATS(0,2)<1>		0x20007:ud				{ NoDDChk }			//enable the flag after testing on si						{ NoDDChk } // block width and height (8x3)
+add (2)		mudMSGHDR_ENC_STATS(0,0)<1>		mudMSGHDR_ENC_STATS(0,0)<2;2,1>       	r1.12<2;2,1>:uw					// Add pitch to X,Y origin
+
+
+	//Data block for Encoder Statistics
+	//----------------------------------------------------
+	//|  0  |   1  |   2   |  3  |  4  |  5  |  6  |  7  | Bytes
+	//----------------------------------------------------
+	//| BNE | MCNT | FCNT | TCNT |  X  |  X  |  X  |  X  |
+	//----------------------------------------------------
+	//|   DcTpT    |     SVCM    |   DcBpT   |   DcTpB   |
+	//----------------------------------------------------
+	//|   SHCM     |     STAD    |   DcTcB   |   DcBpB   |
+	//----------------------------------------------------
+	mov (1)		mudMSGHDR_ENC_STATS(1,0)<1>		udDNDI_RESP(9,1)<0;1,0>    		{ NoDDClr }			// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,3)<2>		udDNDI_RESP(9,3)<2;2,1>    		{ NoDDClr, NoDDChk }		// Move encoder statistics to MRF
+	mov (2)		mudMSGHDR_ENC_STATS(1,2)<2>		udDNDI_RESP(9,5)<2;2,1>    		{ NoDDChk }			// Move encoder statistics to MRF
+
+
+send (8)   null<1>:d    r24    0x5    0x40A8021:ud
+
+
+
+// FileName:    DI_Save_PA_16x4.asm
+// Author:      Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+add (4) a0.4<1>:uw   r2.28<4;4,1>:ub   608:w               // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud     r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d      r7.0<0;1,0>:w            1:w  { NoDDClr }          // H. block origin need to be doubled
+mov (1) r27.1<1>:d      r7.1<0;1,0>:w                 { NoDDClr, NoDDChk }       // Block origin
+mov (1) r27.2<1>:ud     0x3001F:ud          { NoDDChk }          // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud       r27<8;8,1>:ud
+mov (8) r23.0<1>:ud       r27<8;8,1>:ud
+
+// Pack 2nd field Y
+    mov (16)    r[a0.4, 0]<2>      ubDNDI_RESP(0,0)               { NoDDClr }
+    mov (16)    r[a0.4, 32]<2>      ubDNDI_RESP(0,16)               { NoDDClr }
+    mov (16)    r[a0.4, 64]<2>      ubDNDI_RESP(0,32)               { NoDDClr }
+    mov (16)    r[a0.4, 96]<2>      ubDNDI_RESP(0,48)               { NoDDClr }
+// Pack 2nd field U
+    mov (8)     r[a0.5, 0]<4>      ubDNDI_RESP(2,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 32]<4>      ubDNDI_RESP(2,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 64]<4>      ubDNDI_RESP(2,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 96]<4>      ubDNDI_RESP(2,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 2nd field V
+    mov (8)     r[a0.6, 0]<4>      ubDNDI_RESP(2,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 32]<4>      ubDNDI_RESP(2,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 64]<4>      ubDNDI_RESP(2,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 96]<4>      ubDNDI_RESP(2,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+// Pack 1st field Y
+    mov (16)    r[a0.4, 160]<2>    ubDNDI_RESP(4,0)               { NoDDClr }
+    mov (16)    r[a0.4, 192]<2>    ubDNDI_RESP(4,16)               { NoDDClr }
+    mov (16)    r[a0.4, 224]<2>    ubDNDI_RESP(4,32)               { NoDDClr }
+    mov (16)    r[a0.4, 256]<2>    ubDNDI_RESP(4,48)               { NoDDClr }
+// Pack 1st field U
+    mov (8)     r[a0.5, 160]<4>    ubDNDI_RESP(6,1)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 192]<4>    ubDNDI_RESP(6,17)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 224]<4>    ubDNDI_RESP(6,33)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+    mov (8)     r[a0.5, 256]<4>    ubDNDI_RESP(6,49)<16;8,2>   { NoDDClr, NoDDChk }  //U pixels
+// Pack 1st field V
+    mov (8)     r[a0.6, 160]<4>    ubDNDI_RESP(6,0)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 192]<4>    ubDNDI_RESP(6,16)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 224]<4>    ubDNDI_RESP(6,32)<16;8,2>     { NoDDChk }     //Vpixels
+    mov (8)     r[a0.6, 256]<4>    ubDNDI_RESP(6,48)<16;8,2>     { NoDDChk }     //Vpixels
+
+//save the previous frame
+send (8)    null<1>:d    r18.0     0x5     0xA0A801B:ud
+
+//save the current frame
+send (8)    null<1>:d    r23.0     0x5     0xA0A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud 
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code 
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/Save_AVS_NV12.g4a b/src/shaders/post_processing/gen7/Save_AVS_NV12.g4a
new file mode 100644
index 0000000..dafe9b1
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Save_AVS_NV12.g4a
@@ -0,0 +1,621 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//  131    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_NV12.asm
+//
+// Save NV12 420 frame data block of size 16x16
+//
+// To save 16x16 block (16x16 bytes of Y and 16x8 bytes of interleaved UV), we need 2 send instructions with of size 16x16 and 16x8 each.
+//  ---------------
+//  |    16x16    |
+//  |    YUYV     |
+//  ---------------
+//  | 16x8  UV	  |
+//  ---------------
+
+//-----------------------------------------------------------------
+//The layout of data is as follows:
+//mMSGHDR0			: Y data header (16x16)
+//mubMSGPAYLOAD0	: Y data payload (8 GRFs)
+//mMSGHDR1			: U data header (16x8)
+//mubMSGPAYLOAD1	: U data payload (4 GRFs)
+//------------------------------------------------------------------
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare    mudMSGPAYLOAD0  Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD1  Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD2  Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD3  Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare    muwMSGPAYLOAD0  Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD1  Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD2  Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD3  Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare    mubMSGPAYLOAD0  Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD1  Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD2  Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD3  Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD4  Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD5  Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD6  Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD7  Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+	// the r17 register (nTEMP0) is originally defined from "Common.inc"
+	// instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+	.declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+	// At the save module we have all 8 address sub-registers available.
+	// So we will use PING-PONG type of scheme to save the data using
+	// pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
+	// reduce dependency. - rT
+
+	//wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
+	//Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
+	//Offsets are zero for buffer 0 and buffer 4.
+	add   (4) a0.0:uw   r22.0<4;4,1>:w          0:uw
+	add   (4) a0.4:uw   r22.0<4;4,1>:w          512:uw
+
+	//Set up header for Y,U and V data
+	mov  (8) r28<1>:ud      r27<8;8,1>:ud
+	mov  (8) r37<1>:ud      r27<8;8,1>:ud
+
+	mov  (2) r28.0<1>:d     r7.0<2;2,1>:w                   		{ NoDDClr }  		//ORI Y (LUMA) 	= ORI
+	mov  (1) r37.0<1>:d     r7.0<0;1,0>:w                   		{ NoDDClr }  		//H ORI (CHROMA)	= H ORI
+	shr  (1) r37.1<1>:d     r7.1<0;1,0>:w            1:w  		{ NoDDClr, NoDDChk }  	//V ORI (CHROMA)	= V ORI/2
+
+  	mov  (1) r28.2<1>:ud    0xF000F:ud	{ NoDDChk }  // Y Block width and height (16x16)
+	mov  (1) r37.2<1>:ud    0x7000F:ud  	{ NoDDChk }  // UV Block width and height(16x8)
+
+// Unscramble, and pack data directly to MRFs
+
+//	Data 16x16 block is divided as -
+//		---------
+//		|   0   |
+//		---------
+//		|   1  	|
+//		---------
+//		|   2  	|
+//		---------
+//		|   3  	|
+//		---------
+//		All sub-blocks are of size 16x4
+//		0: ubBUFFER_0
+//		1: ubBUFFER_1, ubBUFFER_0+16
+//		2: ubBUFFER_2
+//		3: ubBUFFER_3, ubBUFFER_2+16
+
+	//Y Rounding 16x4 top part
+	add.sat (16) r[a0.1,0]<1>:uw		r[a0.1,0]<16;16,1>:uw			0x0080:uw		
+	add.sat (16) r[a0.1,32]<1>:uw		r[a0.1,32]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.1,64]<1>:uw		r[a0.1,64]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.1,96]<1>:uw		r[a0.1,96]<16;16,1>:uw		0x0080:uw		
+
+	// U Averaging and Rounding, 8x2 top part
+	shr   (8)	uwBUFFER_5(0,0)<2>			r[a0.2,0]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(1,0)<2>			r[a0.2,32]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(2,0)<2>			r[a0.2,64]<16;8,2>:uw			1:w
+	shr   (8)	uwBUFFER_5(3,0)<2>			r[a0.2,96]<16;8,2>:uw			1:w
+
+	add	  (8)	uwBUFFER_5(0,0)<2>			uwBUFFER_5(0,0)<16;8,2>				uwBUFFER_5(1,0)<16;8,2>
+	add.sat (8) r[a0.2,0]<2>:uw		uwBUFFER_5(0,0)<16;8,2>				0x0080:uw
+
+	add	  (8)	uwBUFFER_5(2,0)<2>			uwBUFFER_5(2,0)<16;8,2>				uwBUFFER_5(3,0)<16;8,2>
+	add.sat (8) r[a0.2,64]<2>:uw		uwBUFFER_5(2,0)<16;8,2>				0x0080:uw
+
+	// V Averaging and Rounding, 8x2 top part
+	shr   (8)	uwBUFFER_5(4,0)<2>			r[a0.0,0]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(5,0)<2>			r[a0.0,32]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(6,0)<2>			r[a0.0,64]<16;8,2>:uw			1:w
+	shr   (8)	uwBUFFER_5(7,0)<2>			r[a0.0,96]<16;8,2>:uw			1:w
+
+	add	  (8)	uwBUFFER_5(4,0)<2>			uwBUFFER_5(4,0)<16;8,2>				uwBUFFER_5(5,0)<16;8,2>
+	add.sat (8) r[a0.0,0]<2>:uw		uwBUFFER_5(4,0)<16;8,2>				0x0080:uw
+
+	add	  (8)	uwBUFFER_5(6,0)<2>			uwBUFFER_5(6,0)<16;8,2>				uwBUFFER_5(7,0)<16;8,2>
+	add.sat (8) r[a0.0,64]<2>:uw		uwBUFFER_5(6,0)<16;8,2>				0x0080:uw
+
+	add   (4) a0.0:uw   			r22.0<4;4,1>:w		1024:uw	//Update Buffer 2 pointers
+
+	//Y Rounding, 16x4 bottom part
+	add.sat (16) r[a0.5,0]<1>:uw		r[a0.5,0]<16;16,1>:uw			0x0080:uw		
+	add.sat (16) r[a0.5,32]<1>:uw		r[a0.5,32]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.5,64]<1>:uw		r[a0.5,64]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.5,96]<1>:uw		r[a0.5,96]<16;16,1>:uw		0x0080:uw		
+
+	// U Averaging and Rounding, 8x2 bottom part
+	shr   (8)	uwBUFFER_5(0,0)<2>			r[a0.6,0]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(1,0)<2>			r[a0.6,32]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(2,0)<2>			r[a0.6,64]<16;8,2>:uw			1:w
+	shr   (8)	uwBUFFER_5(3,0)<2>			r[a0.6,96]<16;8,2>:uw			1:w
+
+	add	  (8)	uwBUFFER_5(0,0)<2>			uwBUFFER_5(0,0)<16;8,2>				uwBUFFER_5(1,0)<16;8,2>
+	add.sat (8) r[a0.6,0]<2>:uw		uwBUFFER_5(0,0)<16;8,2>				0x0080:uw
+
+	add	  (8)	uwBUFFER_5(2,0)<2>			uwBUFFER_5(2,0)<16;8,2>				uwBUFFER_5(3,0)<16;8,2>
+	add.sat (8) r[a0.6,64]<2>:uw		uwBUFFER_5(2,0)<16;8,2>				0x0080:uw
+
+	// V Averaging and Rounding, 8x2 bottom part
+	shr   (8)	uwBUFFER_5(4,0)<2>			r[a0.4,0]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(5,0)<2>			r[a0.4,32]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(6,0)<2>			r[a0.4,64]<16;8,2>:uw			1:w
+	shr   (8)	uwBUFFER_5(7,0)<2>			r[a0.4,96]<16;8,2>:uw			1:w
+
+	add	  (8)	uwBUFFER_5(4,0)<2>			uwBUFFER_5(4,0)<16;8,2>				uwBUFFER_5(5,0)<16;8,2>
+	add.sat (8) r[a0.4,0]<2>:uw		uwBUFFER_5(4,0)<16;8,2>				0x0080:uw
+
+	add	  (8)	uwBUFFER_5(6,0)<2>			uwBUFFER_5(6,0)<16;8,2>				uwBUFFER_5(7,0)<16;8,2>
+	add.sat (8) r[a0.4,64]<2>:uw		uwBUFFER_5(6,0)<16;8,2>				0x0080:uw
+
+	add   (4) a0.4:uw   		r22.0<4;4,1>:w          1536:uw	//Update Buffer 3 pointers
+	//Y Rounding 16x4 top part
+	add.sat (16) r[a0.1,0]<1>:uw		r[a0.1,0]<16;16,1>:uw			0x0080:uw		
+	add.sat (16) r[a0.1,32]<1>:uw		r[a0.1,32]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.1,64]<1>:uw		r[a0.1,64]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.1,96]<1>:uw		r[a0.1,96]<16;16,1>:uw		0x0080:uw		
+
+	// U Averaging and Rounding, 8x2 top part
+	shr   (8)	uwBUFFER_5(0,0)<2>			r[a0.2,0]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(1,0)<2>			r[a0.2,32]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(2,0)<2>			r[a0.2,64]<16;8,2>:uw			1:w
+	shr   (8)	uwBUFFER_5(3,0)<2>			r[a0.2,96]<16;8,2>:uw			1:w
+
+	add	  (8)	uwBUFFER_5(0,0)<2>			uwBUFFER_5(0,0)<16;8,2>				uwBUFFER_5(1,0)<16;8,2>
+	add.sat (8) r[a0.2,0]<2>:uw		uwBUFFER_5(0,0)<16;8,2>				0x0080:uw
+
+	add	  (8)	uwBUFFER_5(2,0)<2>			uwBUFFER_5(2,0)<16;8,2>				uwBUFFER_5(3,0)<16;8,2>
+	add.sat (8) r[a0.2,64]<2>:uw		uwBUFFER_5(2,0)<16;8,2>				0x0080:uw
+
+	// V Averaging and Rounding, 8x2 top part
+	shr   (8)	uwBUFFER_5(4,0)<2>			r[a0.0,0]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(5,0)<2>			r[a0.0,32]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(6,0)<2>			r[a0.0,64]<16;8,2>:uw			1:w
+	shr   (8)	uwBUFFER_5(7,0)<2>			r[a0.0,96]<16;8,2>:uw			1:w
+
+	add	  (8)	uwBUFFER_5(4,0)<2>			uwBUFFER_5(4,0)<16;8,2>				uwBUFFER_5(5,0)<16;8,2>
+	add.sat (8) r[a0.0,0]<2>:uw		uwBUFFER_5(4,0)<16;8,2>				0x0080:uw
+
+	add	  (8)	uwBUFFER_5(6,0)<2>			uwBUFFER_5(6,0)<16;8,2>				uwBUFFER_5(7,0)<16;8,2>
+	add.sat (8) r[a0.0,64]<2>:uw		uwBUFFER_5(6,0)<16;8,2>				0x0080:uw
+
+	add   (4) a0.0:uw   			r22.0<4;4,1>:w		1024:uw	//Update Buffer 2 pointers
+
+	//Y Rounding, 16x4 bottom part
+	add.sat (16) r[a0.5,0]<1>:uw		r[a0.5,0]<16;16,1>:uw			0x0080:uw		
+	add.sat (16) r[a0.5,32]<1>:uw		r[a0.5,32]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.5,64]<1>:uw		r[a0.5,64]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.5,96]<1>:uw		r[a0.5,96]<16;16,1>:uw		0x0080:uw		
+
+	// U Averaging and Rounding, 8x2 bottom part
+	shr   (8)	uwBUFFER_5(0,0)<2>			r[a0.6,0]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(1,0)<2>			r[a0.6,32]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(2,0)<2>			r[a0.6,64]<16;8,2>:uw			1:w
+	shr   (8)	uwBUFFER_5(3,0)<2>			r[a0.6,96]<16;8,2>:uw			1:w
+
+	add	  (8)	uwBUFFER_5(0,0)<2>			uwBUFFER_5(0,0)<16;8,2>				uwBUFFER_5(1,0)<16;8,2>
+	add.sat (8) r[a0.6,0]<2>:uw		uwBUFFER_5(0,0)<16;8,2>				0x0080:uw
+
+	add	  (8)	uwBUFFER_5(2,0)<2>			uwBUFFER_5(2,0)<16;8,2>				uwBUFFER_5(3,0)<16;8,2>
+	add.sat (8) r[a0.6,64]<2>:uw		uwBUFFER_5(2,0)<16;8,2>				0x0080:uw
+
+	// V Averaging and Rounding, 8x2 bottom part
+	shr   (8)	uwBUFFER_5(4,0)<2>			r[a0.4,0]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(5,0)<2>			r[a0.4,32]<16;8,2>:uw			1:w
+	shr	  (8)	uwBUFFER_5(6,0)<2>			r[a0.4,64]<16;8,2>:uw			1:w
+	shr   (8)	uwBUFFER_5(7,0)<2>			r[a0.4,96]<16;8,2>:uw			1:w
+
+	add	  (8)	uwBUFFER_5(4,0)<2>			uwBUFFER_5(4,0)<16;8,2>				uwBUFFER_5(5,0)<16;8,2>
+	add.sat (8) r[a0.4,0]<2>:uw		uwBUFFER_5(4,0)<16;8,2>				0x0080:uw
+
+	add	  (8)	uwBUFFER_5(6,0)<2>			uwBUFFER_5(6,0)<16;8,2>				uwBUFFER_5(7,0)<16;8,2>
+	add.sat (8) r[a0.4,64]<2>:uw		uwBUFFER_5(6,0)<16;8,2>				0x0080:uw
+
+	add   (4) a0.4:uw   		r22.0<4;4,1>:w          1536:uw	//Update Buffer 3 pointers
+	// restore pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4 registers
+	add   (4) a0.0:uw   r22.0<4;4,1>:w          0:uw
+	add   (4) a0.4:uw   r22.0<4;4,1>:w          512:uw
+
+//Buffer 0
+//Move Y to msg payload
+	mov  (16)  mubMSGPAYLOAD0(0,0)<1>			r[a0.1, 1]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(0,16)<1>			r[a0.1, 33]<32;16,2>:ub		{ NoDDChk }
+	mov  (16)  mubMSGPAYLOAD0(1,0)<1>			r[a0.1, 65]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(1,16)<1>			r[a0.1, 97]<32;16,2>:ub		{ NoDDChk }
+
+//Move U to msg payload
+	mov  (8)  mubMSGPAYLOAD1(0,0)<2>			r[a0.2, 1]<32;8,4>:ub		{ NoDDClr }
+	mov  (8)  mubMSGPAYLOAD1(0,16)<2>			r[a0.2, 65]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+
+//Move V to msg payload
+	mov  (8)  mubMSGPAYLOAD1(0,1)<2>			r[a0.0, 1]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+	mov  (8)  mubMSGPAYLOAD1(0,17)<2>			r[a0.0, 65]<32;8,4>:ub		{ NoDDChk }
+
+	add   (4) a0.0:uw   			r22.0<4;4,1>:w		1024:uw	//Update Buffer 2 pointers
+
+//Buffer 1
+	mov  (16)  mubMSGPAYLOAD0(2,0)<1>			r[a0.5, 1]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(2,16)<1>			r[a0.5, 33]<32;16,2>:ub		{ NoDDChk }
+	mov  (16)  mubMSGPAYLOAD0(3,0)<1>			r[a0.5, 65]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(3,16)<1>			r[a0.5, 97]<32;16,2>:ub		{ NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD1(1,0)<2>			r[a0.6, 1]<32;8,4>:ub		{ NoDDClr }
+	mov  (8)  mubMSGPAYLOAD1(1,16)<2>			r[a0.6, 65]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD1(1,1)<2>			r[a0.4, 1]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+	mov  (8)  mubMSGPAYLOAD1(1,17)<2>			r[a0.4, 65]<32;8,4>:ub		{ NoDDChk }
+
+	add   (4) a0.4:uw   		r22.0<4;4,1>:w          1536:uw	//Update Buffer 3 pointers
+
+//Buffer 2
+	mov  (16)  mubMSGPAYLOAD0(4,0)<1>			r[a0.1, 1]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(4,16)<1>			r[a0.1, 33]<32;16,2>:ub		{ NoDDChk }
+	mov  (16)  mubMSGPAYLOAD0(5,0)<1>			r[a0.1, 65]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(5,16)<1>			r[a0.1, 97]<32;16,2>:ub		{ NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD1(2,0)<2>			r[a0.2, 1]<32;8,4>:ub		{ NoDDClr }
+	mov  (8)  mubMSGPAYLOAD1(2,16)<2>			r[a0.2, 65]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD1(2,1)<2>			r[a0.0, 1]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+	mov  (8)  mubMSGPAYLOAD1(2,17)<2>			r[a0.0, 65]<32;8,4>:ub		{ NoDDChk }
+
+//Buffer 3
+	mov  (16)  mubMSGPAYLOAD0(6,0)<1>			r[a0.5, 1]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(6,16)<1>			r[a0.5, 33]<32;16,2>:ub		{ NoDDChk }
+	mov  (16)  mubMSGPAYLOAD0(7,0)<1>			r[a0.5, 65]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(7,16)<1>			r[a0.5, 97]<32;16,2>:ub		{ NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD1(3,0)<2>			r[a0.6, 1]<32;8,4>:ub		{ NoDDClr }
+	mov  (8)  mubMSGPAYLOAD1(3,16)<2>			r[a0.6, 65]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD1(3,1)<2>			r[a0.4, 1]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+	mov  (8)  mubMSGPAYLOAD1(3,17)<2>			r[a0.4, 65]<32;8,4>:ub		{ NoDDChk }
+//===========================================================================
+
+send (1)    null<1>:d    r28   	0x5			0x120A8018:ud
+send (1)    null<1>:d    r37   	0x5			0xA0A8019:ud
diff --git a/src/shaders/post_processing/gen7/Save_AVS_PA.g4a b/src/shaders/post_processing/gen7/Save_AVS_PA.g4a
new file mode 100644
index 0000000..42f87c3
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Save_AVS_PA.g4a
@@ -0,0 +1,625 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//  174    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_PA.asm
+//
+// Save PA 422 frame data block of size 16x16
+//
+// To save 16x16 block (32x16 bytes of YUYV) we need 2 send instructions with of size 16x16 each.
+//  -------------------------------
+//  |    16x16    |    16x16      |
+//  |    YUYV     |    YUYV       |
+//  -------------------------------
+// these 2 sends are replaced by 8 32x2 sends to improve performance
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare    mudMSGPAYLOAD0  Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD1  Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD2  Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD3  Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare    muwMSGPAYLOAD0  Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD1  Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD2  Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD3  Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare    mubMSGPAYLOAD0  Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD1  Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD2  Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD3  Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD4  Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD5  Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD6  Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD7  Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+	// the r17 register (nTEMP0) is originally defined from "Common.inc"
+	// instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+	.declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+	//wBUFF_CHNL_PTR points to buffer 0.
+	//Add appropriate offsets to get pointers for all buffers (1,2,3).
+	//Offset is zero for buffer 0.
+	add   (4)   a0.0:uw   r22.0<4;4,1>:w          0:uw 
+
+	//Set DEST pointers according to output packing i.e. YUYV, YVYU, UYVY, VYUY
+	add	(4)		a0.4<1>:w			r2.28<4;4,1>:ub    928:uw
+
+    shl (1) r27.0<1>:d      r7.0<0;1,0>:w            1:w  			 { NoDDClr }            // H. block origin need to be 2 times
+    mov (1) r27.1<1>:d      r7.1<0;1,0>:w                 			 { NoDDClr, NoDDChk }   // Block origin (1st quadrant)
+    mov (1) r27.2<1>:ud     0x1001F:ud  	 { NoDDChk }            // Block width and height (32x2)
+
+// Rounding
+	// left
+	add.sat (4) 	r[a0.0, 0]<2>:uw		r[a0.0,  0]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,32]<2>:uw		r[a0.0, 32]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,64]<2>:uw		r[a0.0, 64]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,96]<2>:uw		r[a0.0, 96]<8;4,2>:uw		0x0080:uw			
+
+	add.sat (8) 	r[a0.1, 0]<1>:uw		r[a0.1,  0]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,32]<1>:uw		r[a0.1, 32]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,64]<1>:uw		r[a0.1, 64]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,96]<1>:uw		r[a0.1, 96]<8;8,1>:uw		0x0080:uw			
+
+	add.sat (4) 	r[a0.2, 0]<2>:uw		r[a0.2,  0]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,32]<2>:uw		r[a0.2, 32]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,64]<2>:uw		r[a0.2, 64]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,96]<2>:uw		r[a0.2, 96]<8;4,2>:uw		0x0080:uw			
+
+	// right
+	add.sat (4) 	r[a0.0,16]<2>:uw			r[a0.0, 16]<8;4,2>:uw			0x0080:uw			
+	add.sat (4) 	r[a0.0,48]<2>:uw		r[a0.0, 48]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,80]<2>:uw		r[a0.0, 80]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,112]<2>:uw		r[a0.0, 112]<8;4,2>:uw		0x0080:uw			
+
+	add.sat (8) 	r[a0.1, 16]<1>:uw			r[a0.1, 16]<8;8,1>:uw			0x0080:uw			
+	add.sat (8) 	r[a0.1,48]<1>:uw		r[a0.1, 48]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,80]<1>:uw		r[a0.1, 80]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,112]<1>:uw		r[a0.1, 112]<8;8,1>:uw		0x0080:uw			
+
+	add.sat (4) 	r[a0.2, 16]<2>:uw			r[a0.2,  16]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,48]<2>:uw		r[a0.2, 48]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,80]<2>:uw		r[a0.2, 80]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,112]<2>:uw		r[a0.2, 112]<8;4,2>:uw		0x0080:uw			
+
+    add (4)    a0.0:uw    r22.0<4;4,1>:w    512:uw
+	// left
+	add.sat (4) 	r[a0.0, 0]<2>:uw		r[a0.0,  0]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,32]<2>:uw		r[a0.0, 32]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,64]<2>:uw		r[a0.0, 64]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,96]<2>:uw		r[a0.0, 96]<8;4,2>:uw		0x0080:uw			
+
+	add.sat (8) 	r[a0.1, 0]<1>:uw		r[a0.1,  0]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,32]<1>:uw		r[a0.1, 32]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,64]<1>:uw		r[a0.1, 64]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,96]<1>:uw		r[a0.1, 96]<8;8,1>:uw		0x0080:uw			
+
+	add.sat (4) 	r[a0.2, 0]<2>:uw		r[a0.2,  0]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,32]<2>:uw		r[a0.2, 32]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,64]<2>:uw		r[a0.2, 64]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,96]<2>:uw		r[a0.2, 96]<8;4,2>:uw		0x0080:uw			
+
+	// right
+	add.sat (4) 	r[a0.0,16]<2>:uw			r[a0.0, 16]<8;4,2>:uw			0x0080:uw			
+	add.sat (4) 	r[a0.0,48]<2>:uw		r[a0.0, 48]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,80]<2>:uw		r[a0.0, 80]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,112]<2>:uw		r[a0.0, 112]<8;4,2>:uw		0x0080:uw			
+
+	add.sat (8) 	r[a0.1, 16]<1>:uw			r[a0.1, 16]<8;8,1>:uw			0x0080:uw			
+	add.sat (8) 	r[a0.1,48]<1>:uw		r[a0.1, 48]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,80]<1>:uw		r[a0.1, 80]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,112]<1>:uw		r[a0.1, 112]<8;8,1>:uw		0x0080:uw			
+
+	add.sat (4) 	r[a0.2, 16]<2>:uw			r[a0.2,  16]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,48]<2>:uw		r[a0.2, 48]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,80]<2>:uw		r[a0.2, 80]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,112]<2>:uw		r[a0.2, 112]<8;4,2>:uw		0x0080:uw			
+
+    add (4)    a0.0:uw    r22.0<4;4,1>:w    1024:uw
+	// left
+	add.sat (4) 	r[a0.0, 0]<2>:uw		r[a0.0,  0]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,32]<2>:uw		r[a0.0, 32]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,64]<2>:uw		r[a0.0, 64]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,96]<2>:uw		r[a0.0, 96]<8;4,2>:uw		0x0080:uw			
+
+	add.sat (8) 	r[a0.1, 0]<1>:uw		r[a0.1,  0]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,32]<1>:uw		r[a0.1, 32]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,64]<1>:uw		r[a0.1, 64]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,96]<1>:uw		r[a0.1, 96]<8;8,1>:uw		0x0080:uw			
+
+	add.sat (4) 	r[a0.2, 0]<2>:uw		r[a0.2,  0]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,32]<2>:uw		r[a0.2, 32]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,64]<2>:uw		r[a0.2, 64]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,96]<2>:uw		r[a0.2, 96]<8;4,2>:uw		0x0080:uw			
+
+	// right
+	add.sat (4) 	r[a0.0,16]<2>:uw			r[a0.0, 16]<8;4,2>:uw			0x0080:uw			
+	add.sat (4) 	r[a0.0,48]<2>:uw		r[a0.0, 48]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,80]<2>:uw		r[a0.0, 80]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,112]<2>:uw		r[a0.0, 112]<8;4,2>:uw		0x0080:uw			
+
+	add.sat (8) 	r[a0.1, 16]<1>:uw			r[a0.1, 16]<8;8,1>:uw			0x0080:uw			
+	add.sat (8) 	r[a0.1,48]<1>:uw		r[a0.1, 48]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,80]<1>:uw		r[a0.1, 80]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,112]<1>:uw		r[a0.1, 112]<8;8,1>:uw		0x0080:uw			
+
+	add.sat (4) 	r[a0.2, 16]<2>:uw			r[a0.2,  16]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,48]<2>:uw		r[a0.2, 48]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,80]<2>:uw		r[a0.2, 80]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,112]<2>:uw		r[a0.2, 112]<8;4,2>:uw		0x0080:uw			
+
+    add (4)    a0.0:uw    r22.0<4;4,1>:w    1536:uw
+	// left
+	add.sat (4) 	r[a0.0, 0]<2>:uw		r[a0.0,  0]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,32]<2>:uw		r[a0.0, 32]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,64]<2>:uw		r[a0.0, 64]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,96]<2>:uw		r[a0.0, 96]<8;4,2>:uw		0x0080:uw			
+
+	add.sat (8) 	r[a0.1, 0]<1>:uw		r[a0.1,  0]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,32]<1>:uw		r[a0.1, 32]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,64]<1>:uw		r[a0.1, 64]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,96]<1>:uw		r[a0.1, 96]<8;8,1>:uw		0x0080:uw			
+
+	add.sat (4) 	r[a0.2, 0]<2>:uw		r[a0.2,  0]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,32]<2>:uw		r[a0.2, 32]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,64]<2>:uw		r[a0.2, 64]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,96]<2>:uw		r[a0.2, 96]<8;4,2>:uw		0x0080:uw			
+
+	// right
+	add.sat (4) 	r[a0.0,16]<2>:uw			r[a0.0, 16]<8;4,2>:uw			0x0080:uw			
+	add.sat (4) 	r[a0.0,48]<2>:uw		r[a0.0, 48]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,80]<2>:uw		r[a0.0, 80]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.0,112]<2>:uw		r[a0.0, 112]<8;4,2>:uw		0x0080:uw			
+
+	add.sat (8) 	r[a0.1, 16]<1>:uw			r[a0.1, 16]<8;8,1>:uw			0x0080:uw			
+	add.sat (8) 	r[a0.1,48]<1>:uw		r[a0.1, 48]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,80]<1>:uw		r[a0.1, 80]<8;8,1>:uw		0x0080:uw			
+	add.sat (8) 	r[a0.1,112]<1>:uw		r[a0.1, 112]<8;8,1>:uw		0x0080:uw			
+
+	add.sat (4) 	r[a0.2, 16]<2>:uw			r[a0.2,  16]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,48]<2>:uw		r[a0.2, 48]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,80]<2>:uw		r[a0.2, 80]<8;4,2>:uw		0x0080:uw			
+	add.sat (4) 	r[a0.2,112]<2>:uw		r[a0.2, 112]<8;4,2>:uw		0x0080:uw			
+
+    add (4)    a0.0:uw    r22.0<4;4,1>:w    2048:uw
+	// restore pointer
+	add   (4)   a0.0:uw   r22.0<4;4,1>:w          0:uw
+
+	mov (8) r28<1>:ud		r27<8;8,1>:ud
+	mov (8) r37<1>:ud		r27<8;8,1>:ud
+	add (1) r37.1<1>:d     r27.1<0;1,0>:d       2:d   // Point to 2nd part
+
+
+	mov  (8)    r[a0.6,  0]<4>:ub    r[a0.0,   1]<32;8,4>:ub         { NoDDClr }
+    mov  (8)    r[a0.6, 32]<4>:ub    r[a0.0,33]<32;8,4>:ub         { NoDDClr }    
+    mov (16)    r[a0.4,  0]<2>:ub    r[a0.1,   1]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov (16)    r[a0.4, 32]<2>:ub    r[a0.1,33]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov  (8)    r[a0.5,  0]<4>:ub    r[a0.2,   1]<32;8,4>:ub         { NoDDChk }
+    mov  (8)    r[a0.5, 32]<4>:ub    r[a0.2,33]<32;8,4>:ub         { NoDDChk }
+
+    mov  (8)    r[a0.6, 288]<4>:ub    r[a0.0,65]<32;8,4>:ub         { NoDDClr }
+    mov  (8)    r[a0.6, 320]<4>:ub    r[a0.0,97]<32;8,4>:ub         { NoDDClr }
+    mov (16)    r[a0.4,288]<2>:ub    r[a0.1,65]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov (16)    r[a0.4,320]<2>:ub    r[a0.1,97]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov  (8)    r[a0.5,288]<4>:ub    r[a0.2,65]<32;8,4>:ub         { NoDDChk }
+    mov  (8)    r[a0.5,320]<4>:ub    r[a0.2,97]<32;8,4>:ub         { NoDDChk }
+
+    send (1)    null<1>:d    r28   	0x5			0x60A8018:ud
+	send (1)    null<1>:d    r37   	0x5			0x60A8018:ud
+
+    // restore pointer
+	add   (4)   a0.0:uw   r22.0<4;4,1>:w          512:uw
+
+	add (1) r28.1<1>:d     r27.1<0;1,0>:d       4:d   // Point to 2nd part
+	add (1) r37.1<1>:d     r27.1<0;1,0>:d       6:d   // Point to 2nd part
+
+
+	mov  (8)    r[a0.6,  0]<4>:ub    r[a0.0,   1]<32;8,4>:ub         { NoDDClr }
+    mov  (8)    r[a0.6, 32]<4>:ub    r[a0.0,33]<32;8,4>:ub         { NoDDClr }    
+    mov (16)    r[a0.4,  0]<2>:ub    r[a0.1,   1]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov (16)    r[a0.4, 32]<2>:ub    r[a0.1,33]<32;16,2>:ub        { NoDDClr, NoDDChk }   
+    mov  (8)    r[a0.5,  0]<4>:ub    r[a0.2,   1]<32;8,4>:ub         { NoDDChk }
+    mov  (8)    r[a0.5, 32]<4>:ub    r[a0.2,33]<32;8,4>:ub         { NoDDChk }
+
+    mov  (8)    r[a0.6, 288]<4>:ub    r[a0.0,65]<32;8,4>:ub         { NoDDClr }
+    mov  (8)    r[a0.6, 320]<4>:ub    r[a0.0,97]<32;8,4>:ub         { NoDDClr }
+    mov (16)    r[a0.4,288]<2>:ub    r[a0.1,65]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov (16)    r[a0.4,320]<2>:ub    r[a0.1,97]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov  (8)    r[a0.5,288]<4>:ub    r[a0.2,65]<32;8,4>:ub         { NoDDChk }
+    mov  (8)    r[a0.5,320]<4>:ub    r[a0.2,97]<32;8,4>:ub         { NoDDChk }
+
+    send (1)    null<1>:d    r28   	0x5			0x60A8018:ud
+	send (1)    null<1>:d    r37   	0x5			0x60A8018:ud
+
+    // restore pointer
+	add   (4)   a0.0:uw   r22.0<4;4,1>:w          1024:uw
+
+	add (1) r28.1<1>:d     r27.1<0;1,0>:d       8:d   // Point to 2nd part
+	add (1) r37.1<1>:d     r27.1<0;1,0>:d       10:d   // Point to 2nd part
+
+
+	mov  (8)    r[a0.6,  0]<4>:ub    r[a0.0,   1]<32;8,4>:ub         { NoDDClr }
+    mov  (8)    r[a0.6, 32]<4>:ub    r[a0.0,33]<32;8,4>:ub         { NoDDClr }    
+    mov (16)    r[a0.4,  0]<2>:ub    r[a0.1,   1]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov (16)    r[a0.4, 32]<2>:ub    r[a0.1,33]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov  (8)    r[a0.5,  0]<4>:ub    r[a0.2,   1]<32;8,4>:ub         { NoDDChk }
+    mov  (8)    r[a0.5, 32]<4>:ub    r[a0.2,33]<32;8,4>:ub         { NoDDChk }
+
+    mov  (8)    r[a0.6, 288]<4>:ub    r[a0.0,65]<32;8,4>:ub         { NoDDClr }
+    mov  (8)    r[a0.6, 320]<4>:ub    r[a0.0,97]<32;8,4>:ub         { NoDDClr }	
+    mov (16)    r[a0.4,288]<2>:ub    r[a0.1,65]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov (16)    r[a0.4,320]<2>:ub    r[a0.1,97]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov  (8)    r[a0.5,288]<4>:ub    r[a0.2,65]<32;8,4>:ub         { NoDDChk }
+    mov  (8)    r[a0.5,320]<4>:ub    r[a0.2,97]<32;8,4>:ub         { NoDDChk }
+
+    send (1)    null<1>:d    r28   	0x5			0x60A8018:ud
+	send (1)    null<1>:d    r37   	0x5			0x60A8018:ud
+
+   // restore pointer
+	add   (4)   a0.0:uw   r22.0<4;4,1>:w          1536:uw
+
+	add (1) r28.1<1>:d     r27.1<0;1,0>:d       12:d   // Point to 2nd part
+	add (1) r37.1<1>:d     r27.1<0;1,0>:d       14:d   // Point to 2nd part
+
+	mov  (8)    r[a0.6,  0]<4>:ub    r[a0.0,   1]<32;8,4>:ub         { NoDDClr }
+    mov  (8)    r[a0.6, 32]<4>:ub    r[a0.0,33]<32;8,4>:ub         { NoDDClr }    
+    mov (16)    r[a0.4,  0]<2>:ub    r[a0.1,   1]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov (16)    r[a0.4, 32]<2>:ub    r[a0.1,33]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov  (8)    r[a0.5,  0]<4>:ub    r[a0.2,   1]<32;8,4>:ub         { NoDDChk }
+    mov  (8)    r[a0.5, 32]<4>:ub    r[a0.2,33]<32;8,4>:ub         { NoDDChk }
+
+    mov  (8)    r[a0.6, 288]<4>:ub    r[a0.0,65]<32;8,4>:ub         { NoDDClr }
+    mov  (8)    r[a0.6, 320]<4>:ub    r[a0.0,97]<32;8,4>:ub         { NoDDClr }
+    mov (16)    r[a0.4,288]<2>:ub    r[a0.1,65]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov (16)    r[a0.4,320]<2>:ub    r[a0.1,97]<32;16,2>:ub        { NoDDClr, NoDDChk }
+    mov  (8)    r[a0.5,288]<4>:ub    r[a0.2,65]<32;8,4>:ub         { NoDDChk }
+    mov  (8)    r[a0.5,320]<4>:ub    r[a0.2,97]<32;8,4>:ub         { NoDDChk }
+
+    send (1)    null<1>:d    r28   	0x5			0x60A8018:ud
+	send (1)    null<1>:d    r37   	0x5			0x60A8018:ud
+
diff --git a/src/shaders/post_processing/gen7/Save_AVS_PL3.g4a b/src/shaders/post_processing/gen7/Save_AVS_PL3.g4a
new file mode 100644
index 0000000..cecb5be
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Save_AVS_PL3.g4a
@@ -0,0 +1,564 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   84    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_PL3.asm
+//
+// Save PL3 420 frame data block of size 16x16
+//
+// To save 16x16 block (16x16 byte of Y and 8x8 byte of U and V each) we need 3 send instructions with one of size 16x16 and two of size 8x8.
+//  -----------------
+//  |    16x16 Y    |
+//  |               |
+//  -----------------
+//  | 8x8 U |
+//  ---------
+//  | 8x8 V |
+//  ---------
+
+//-----------------------------------------------------------------
+//The layout of data is as follows:
+//mMSGHDR0			: Y data header (16x16)
+//mubMSGPAYLOAD0	: Y data payload (8 GRFs)
+//mMSGHDR1			: U data header (8x8)
+//mubMSGPAYLOAD1	: U data payload (2 GRFs)
+//mMSGHDR2			: V data header (8x8)
+//mubMSGPAYLOAD2	: V data payload (2 GRFs)
+//------------------------------------------------------------------
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare    mudMSGPAYLOAD0  Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD1  Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD2  Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD3  Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare    muwMSGPAYLOAD0  Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD1  Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD2  Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD3  Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare    mubMSGPAYLOAD0  Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD1  Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD2  Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD3  Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD4  Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD5  Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD6  Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD7  Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+	// the r17 register (nTEMP0) is originally defined from "Common.inc"
+	// instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+	.declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+	// At the save module we have all 8 address sub-registers available.
+	// So we will use PING-PONG type of scheme to save the data using
+	// pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
+	// reduce dependency. - rT
+
+	//wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
+	//Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
+	//Offsets are zero for buffer 0 and buffer 4.
+	add   (4) a0.0:uw   r22.0<4;4,1>:w          0:uw
+	add   (4) a0.4:uw   r22.0<4;4,1>:w          512:uw
+
+	//Set up header for Y,U and V data
+	mov  (8) r28<1>:ud      r27<8;8,1>:ud
+	mov  (8) r37<1>:ud      r27<8;8,1>:ud
+	mov  (8) r46<1>:ud      r27<8;8,1>:ud
+
+	mov  (2) r28.0<1>:d     r7.0<2;2,1>:w                   		{ NoDDClr }  //ORI Y (LUMA) = ORI
+	shr  (2) r37.0<1>:d     r7.0<2;2,1>:w            1:w  		{ NoDDClr }  //H/V ORI U 	= H/V ORI/2
+	shr  (2) r46.0<1>:d     r7.0<2;2,1>:w            1:w  		{ NoDDClr }  //H/V ORI V 	= H/V ORI/2
+
+  	mov  (1) r28.2<1>:ud    0xF000F:ud	{ NoDDChk }  // Y Block width and height (16x16)
+	mov  (1)r37.2<1>:ud     0x70007:ud  	{ NoDDChk }  // U Block width and height (8x8)
+	mov  (1)r46.2<1>:ud     0x70007:ud  	{ NoDDChk }  // V Block width and height (8x8)
+
+// Unscramble, and pack data directly to MRFs
+
+//	Data 16x16 block is divided as -
+//		---------
+//		|   0   |
+//		---------
+//		|   1  	|
+//		---------
+//		|   2  	|
+//		---------
+//		|   3  	|
+//		---------
+//		All sub-blocks are of size 16x4
+//		0: ubBUFFER_0
+//		1: ubBUFFER_1, ubBUFFER_0+16
+//		2: ubBUFFER_2
+//		3: ubBUFFER_3, ubBUFFER_2+16
+
+	//Y Rounding, first
+	add.sat (16) r[a0.1,0]<1>:uw		r[a0.1,0]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.1,32]<1>:uw		r[a0.1,32]<16;16,1>:uw	0x0080:uw		
+	add.sat (16) r[a0.1,64]<1>:uw		r[a0.1,64]<16;16,1>:uw	0x0080:uw		
+	add.sat (16) r[a0.1,96]<1>:uw		r[a0.1,96]<16;16,1>:uw	0x0080:uw		
+
+	// U rounding
+	add.sat (8) r[a0.2,0]<2>:uw		r[a0.2,0]<16;8,2>:uw		0x0080:uw		
+	add.sat (8) r[a0.2,64]<2>:uw		r[a0.2,64]<16;8,2>:uw		0x0080:uw		
+
+	// V rounding
+	add.sat (8) r[a0.0,0]<2>:uw		r[a0.0,0]<16;8,2>:uw		0x0080:uw		
+	add.sat (8) r[a0.0,64]<2>:uw		r[a0.0,64]<16;8,2>:uw		0x0080:uw		
+
+	add   (4) a0.0:uw   			r22.0<4;4,1>:w		1024:uw	//Update Buffer 2 pointers
+
+	//Y Rounding, second
+	add.sat (16) r[a0.5,0]<1>:uw		r[a0.5,0]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.5,32]<1>:uw		r[a0.5,32]<16;16,1>:uw	0x0080:uw		
+	add.sat (16) r[a0.5,64]<1>:uw		r[a0.5,64]<16;16,1>:uw	0x0080:uw		
+	add.sat (16) r[a0.5,96]<1>:uw		r[a0.5,96]<16;16,1>:uw	0x0080:uw		
+
+	// U rounding
+	add.sat (8) r[a0.6,0]<2>:uw		r[a0.6,0]<16;8,2>:uw		0x0080:uw		
+	add.sat (8) r[a0.6,64]<2>:uw		r[a0.6,64]<16;8,2>:uw		0x0080:uw		
+
+	// V rounding
+	add.sat (8) r[a0.4,0]<2>:uw		r[a0.4,0]<16;8,2>:uw		0x0080:uw		
+	add.sat (8) r[a0.4,64]<2>:uw		r[a0.4,64]<16;8,2>:uw		0x0080:uw		
+
+	add   (4) a0.4:uw   		r22.0<4;4,1>:w          1536:uw	//Update Buffer 3 pointers
+
+	//Y Rounding, third
+	add.sat (16) r[a0.1,0]<1>:uw		r[a0.1,0]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.1,32]<1>:uw		r[a0.1,32]<16;16,1>:uw	0x0080:uw		
+	add.sat (16) r[a0.1,64]<1>:uw		r[a0.1,64]<16;16,1>:uw	0x0080:uw		
+	add.sat (16) r[a0.1,96]<1>:uw		r[a0.1,96]<16;16,1>:uw	0x0080:uw		
+
+	// U rounding
+	add.sat (8) r[a0.2,0]<2>:uw		r[a0.2,0]<16;8,2>:uw		0x0080:uw		
+	add.sat (8) r[a0.2,64]<2>:uw		r[a0.2,64]<16;8,2>:uw		0x0080:uw		
+
+	// V rounding
+	add.sat (8) r[a0.0,0]<2>:uw		r[a0.0,0]<16;8,2>:uw		0x0080:uw		
+	add.sat (8) r[a0.0,64]<2>:uw		r[a0.0,64]<16;8,2>:uw		0x0080:uw		
+
+
+	//Y Rounding, fourth
+	add.sat (16) r[a0.5,0]<1>:uw		r[a0.5,0]<16;16,1>:uw		0x0080:uw		
+	add.sat (16) r[a0.5,32]<1>:uw		r[a0.5,32]<16;16,1>:uw	0x0080:uw		
+	add.sat (16) r[a0.5,64]<1>:uw		r[a0.5,64]<16;16,1>:uw	0x0080:uw		
+	add.sat (16) r[a0.5,96]<1>:uw		r[a0.5,96]<16;16,1>:uw	0x0080:uw		
+
+	// U rounding
+	add.sat (8) r[a0.6,0]<2>:uw		r[a0.6,0]<16;8,2>:uw		0x0080:uw		
+	add.sat (8) r[a0.6,64]<2>:uw		r[a0.6,64]<16;8,2>:uw		0x0080:uw		
+
+	// V rounding
+	add.sat (8) r[a0.4,0]<2>:uw		r[a0.4,0]<16;8,2>:uw		0x0080:uw		
+	add.sat (8) r[a0.4,64]<2>:uw		r[a0.4,64]<16;8,2>:uw		0x0080:uw		
+
+	// restore the TOP and BOT pointers
+	add   (4) a0.0:uw   r22.0<4;4,1>:w          0:uw
+	add   (4) a0.4:uw   r22.0<4;4,1>:w          512:uw
+
+//Buffer 0
+//Move Y to msg payload
+	mov  (16)  mubMSGPAYLOAD0(0,0)<1>			r[a0.1, 1]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(0,16)<1>			r[a0.1, 33]<32;16,2>:ub		{ NoDDChk }
+	mov  (16)  mubMSGPAYLOAD0(1,0)<1>			r[a0.1, 65]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(1,16)<1>			r[a0.1, 97]<32;16,2>:ub		{ NoDDChk }
+
+//Move U to msg payload
+	mov  (8)  mubMSGPAYLOAD1(0,0)<1>			r[a0.2, 1]<32;8,4>:ub		{ NoDDClr }
+	mov  (8)  mubMSGPAYLOAD1(0,8)<1>			r[a0.2, 65]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+
+//Move V to msg payload
+	mov  (8)  mubMSGPAYLOAD2(0,0)<1>			r[a0.0, 1]<32;8,4>:ub		{ NoDDClr }
+	mov  (8)  mubMSGPAYLOAD2(0,8)<1>			r[a0.0, 65]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+
+	add   (4) a0.0:uw   			r22.0<4;4,1>:w		1024:uw	//Update Buffer 2 pointers
+
+//Buffer 1
+	mov  (16)  mubMSGPAYLOAD0(2,0)<1>			r[a0.5, 1]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(2,16)<1>			r[a0.5, 33]<32;16,2>:ub		{ NoDDChk }
+	mov  (16)  mubMSGPAYLOAD0(3,0)<1>			r[a0.5, 65]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(3,16)<1>			r[a0.5, 97]<32;16,2>:ub		{ NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD1(0,16)<1>			r[a0.6, 1]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+	mov  (8)  mubMSGPAYLOAD1(0,24)<1>			r[a0.6, 65]<32;8,4>:ub		{ NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD2(0,16)<1>			r[a0.4, 1]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+	mov  (8)  mubMSGPAYLOAD2(0,24)<1>			r[a0.4, 65]<32;8,4>:ub		{ NoDDChk }
+
+	add   (4) a0.4:uw   		r22.0<4;4,1>:w          1536:uw	//Update Buffer 3 pointers
+
+//Buffer 2
+	mov  (16)  mubMSGPAYLOAD0(4,0)<1>			r[a0.1, 1]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(4,16)<1>			r[a0.1, 33]<32;16,2>:ub		{ NoDDChk }
+	mov  (16)  mubMSGPAYLOAD0(5,0)<1>			r[a0.1, 65]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(5,16)<1>			r[a0.1, 97]<32;16,2>:ub		{ NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD1(1,0)<1>			r[a0.2, 1]<32;8,4>:ub		{ NoDDClr }
+	mov  (8)  mubMSGPAYLOAD1(1,8)<1>			r[a0.2, 65]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD2(1,0)<1>			r[a0.0, 1]<32;8,4>:ub		{ NoDDClr }
+	mov  (8)  mubMSGPAYLOAD2(1,8)<1>			r[a0.0, 65]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+
+//Buffer 3
+	mov  (16)  mubMSGPAYLOAD0(6,0)<1>			r[a0.5, 1]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(6,16)<1>			r[a0.5, 33]<32;16,2>:ub		{ NoDDChk }
+	mov  (16)  mubMSGPAYLOAD0(7,0)<1>			r[a0.5, 65]<32;16,2>:ub		{ NoDDClr }
+	mov  (16)  mubMSGPAYLOAD0(7,16)<1>			r[a0.5, 97]<32;16,2>:ub		{ NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD1(1,16)<1>			r[a0.6, 1]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+	mov  (8)  mubMSGPAYLOAD1(1,24)<1>			r[a0.6, 65]<32;8,4>:ub		{ NoDDChk }
+
+	mov  (8)  mubMSGPAYLOAD2(1,16)<1>			r[a0.4, 1]<32;8,4>:ub		{ NoDDClr, NoDDChk }
+	mov  (8)  mubMSGPAYLOAD2(1,24)<1>			r[a0.4, 65]<32;8,4>:ub		{ NoDDChk }
+
+//===========================================================================
+
+send (1)    null<1>:d    r28   	0x5			0x120A8018:ud
+send (1)    null<1>:d    r37   	0x5			0x60A8019:ud
+send (1)    null<1>:d    r46   	0x5			0x60A801A:ud
diff --git a/src/shaders/post_processing/gen7/Save_AVS_RGB.g4a b/src/shaders/post_processing/gen7/Save_AVS_RGB.g4a
new file mode 100644
index 0000000..92bddf8
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Save_AVS_RGB.g4a
@@ -0,0 +1,668 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//  198    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_RGB.asm
+//
+// Save packed ARGB 444 frame data block of size 16x16
+//
+// To save 16x16 block (64x16 byte layout for ARGB8888) we need 4 send instructions with 16x16 in each
+//  -----------------
+//  | 0 | 1 | 2 | 3 |
+//  -----------------
+// the 4 16x16 block send has been replaced by 16 32x2 sends to get better performance
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare    mudMSGPAYLOAD0  Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD1  Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD2  Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mudMSGPAYLOAD3  Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare    muwMSGPAYLOAD0  Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD1  Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD2  Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    muwMSGPAYLOAD3  Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare    mubMSGPAYLOAD0  Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD1  Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD2  Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD3  Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD4  Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD5  Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD6  Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    mubMSGPAYLOAD7  Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+	// the r17 register (nTEMP0) is originally defined from "Common.inc"
+	// instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+	.declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+// At the save module we have all 8 address sub-registers available.
+// So we will use PING-PONG type of scheme to save the data using
+// pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
+// reduce dependency. - rT
+
+// channel switching based on bit 0 of uWRGB_BGR_CH_SWITCH
+
+			// if channel swap?
+			and.nz.f0.0	null<1>:w	r2.3<0;1,0>:uw 0x01:w
+
+//wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
+//Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
+//Offsets are zero for buffer 0 and buffer 4.
+	add   (4)   a0.0:uw   r22.0<4;4,1>:w          0:uw
+
+	// pointer swap
+	(f0.0)	mov (1)	uwTemp0<1> a0.0:uw
+	(f0.0)	mov (1)	a0.0:uw a0.2:uw
+	(f0.0)	mov (1)	a0.2:uw uwTemp0<0;1,0>
+
+    shl (1) r27.0<1>:d      r7.0<0;1,0>:w            2:w  			{ NoDDClr }       // H. block origin need to be quadrupled
+    mov (1) r27.1<1>:d      r7.1<0;1,0>:w                 			{ NoDDClr, NoDDChk }    // Block origin (1st quadrant)
+    mov (1) r27.2<1>:ud     0x1001F:ud  	{ NoDDChk }       // Block width and height (32x2)
+
+    add (4)   a0.4:uw   a0.0<4;4,1>:w          r22.8<0;2,1>:w
+
+    mov (8) r28<1>:ud      r27<8;8,1>:ud
+    mov (8) r37<1>:ud      r27<8;8,1>:ud
+    mov (8) r46<1>:ud      r27<8;8,1>:ud
+    mov (8) r55<1>:ud      r27<8;8,1>:ud
+
+    mov (8) r31<1>:ud		r27<8;8,1>:ud
+	mov (8) r40<1>:ud		r27<8;8,1>:ud
+	mov (8) r49<1>:ud		r27<8;8,1>:ud
+	mov (8) r58<1>:ud		r27<8;8,1>:ud
+
+
+//for BUFFER 0
+	add (1) r37.1<1>:d     r27.1<0;1,0>:d        2:d   
+	add (1) r46.0<1>:d     r27.0<0;1,0>:d       32:d   
+	add (1) r55.0<1>:d     r27.0<0;1,0>:d       32:d   
+	add (1) r55.1<1>:d     r27.1<0;1,0>:d        2:d  
+
+// for BUFFER 1
+	add (1) r31.1<1>:d     r27.1<0;1,0>:d        4:d   
+	add (1) r40.1<1>:d     r27.1<0;1,0>:d        6:d   
+	add (1) r49.0<1>:d     r27.0<0;1,0>:d       32:d   
+	add (1) r49.1<1>:d     r27.1<0;1,0>:d        4:d  
+	add (1) r58.0<1>:d     r27.0<0;1,0>:d       32:d   
+	add (1) r58.1<1>:d     r27.1<0;1,0>:d        6:d  
+    // write Buf_0 to 1st quarter of four horizontal output blocks
+
+// Please note the scattered order of NODDCLR, NODDCHK flags. Since the sub-registers
+// of destination reg are not updated at one place and hence even flags are scattered. -rT
+
+    mov (8)    mubMSGPAYLOAD0(0,   0)<4>  r[a0.2,   1]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD0(0,   1)<4>  r[a0.1,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD0(0,   2)<4>  r[a0.0,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD0(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD0(1,   0)<4>  r[a0.2,   33]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD0(1,   1)<4>  r[a0.1,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD0(1,   2)<4>  r[a0.0,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD0(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD1(0,   0)<4>  r[a0.6,   1]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD1(0,   1)<4>  r[a0.5,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD1(0,   2)<4>  r[a0.4,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD1(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD1(1,   0)<4>  r[a0.6,   33]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD1(1,   1)<4>  r[a0.5,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD1(1,   2)<4>  r[a0.4,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD1(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD2(0,   0)<4>  r[a0.2,   17]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD2(0,   1)<4>  r[a0.1,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD2(0,   2)<4>  r[a0.0,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD2(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD2(1,   0)<4>  r[a0.2,   49]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD2(1,   1)<4>  r[a0.1,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD2(1,   2)<4>  r[a0.0,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD2(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD3(0,   0)<4>  r[a0.6,   17]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD3(0,   1)<4>  r[a0.5,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD3(0,   2)<4>  r[a0.4,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD3(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD3(1,   0)<4>  r[a0.6,   49]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD3(1,   1)<4>  r[a0.5,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD3(1,   2)<4>  r[a0.4,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD3(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    // write Buf_1 to 2nd quarter of four horizontal output blocks
+    add (4)   a0.0:uw   r22.0<4;4,1>:w          512:uw
+
+	// pointer swap
+	(f0.0)	mov (1)	uwTemp0<1> a0.0:uw
+	(f0.0)	mov (1)	a0.0:uw a0.2:uw
+	(f0.0)	mov (1)	a0.2:uw uwTemp0<0;1,0>
+
+    add (4)   a0.4:uw   a0.0<4;4,1>:w         r22.8<0;2,1>:w
+
+	mov (8)    mubMSGPAYLOAD4(0,   0)<4>  r[a0.2,   1]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD4(0,   1)<4>  r[a0.1,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD4(0,   2)<4>  r[a0.0,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD4(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD4(1,   0)<4>  r[a0.2,   33]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD4(1,   1)<4>  r[a0.1,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD4(1,   2)<4>  r[a0.0,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD4(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD5(0,   0)<4>  r[a0.6,   1]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD5(0,   1)<4>  r[a0.5,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD5(0,   2)<4>  r[a0.4,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD5(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD5(1,   0)<4>  r[a0.6,   33]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD5(1,   1)<4>  r[a0.5,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD5(1,   2)<4>  r[a0.4,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD5(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD6(0,   0)<4>  r[a0.2,   17]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD6(0,   1)<4>  r[a0.1,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD6(0,   2)<4>  r[a0.0,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD6(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD6(1,   0)<4>  r[a0.2,   49]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD6(1,   1)<4>  r[a0.1,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD6(1,   2)<4>  r[a0.0,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD6(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD7(0,   0)<4>  r[a0.6,   17]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD7(0,   1)<4>  r[a0.5,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD7(0,   2)<4>  r[a0.4,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD7(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD7(1,   0)<4>  r[a0.6,   49]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD7(1,   1)<4>  r[a0.5,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD7(1,   2)<4>  r[a0.4,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD7(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    // send buffer 0 and buffer 1	
+	send (1)    null<1>:d    r28   	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r37	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r46   	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r55	0x5			0x60A8018:ud
+
+	send (1)    null<1>:d    r31   	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r40	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r49   	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r58	0x5			0x60A8018:ud
+
+//==========
+//prepare headers 
+//for BUFFER 2
+	add (1) r28.1<1>:d     r27.1<0;1,0>:d        8:d   
+	add (1) r37.1<1>:d     r27.1<0;1,0>:d       10:d   
+	add (1) r46.0<1>:d     r27.0<0;1,0>:d       32:d   
+	add (1) r46.1<1>:d     r27.1<0;1,0>:d        8:d   
+	add (1) r55.0<1>:d     r27.0<0;1,0>:d       32:d   
+	add (1) r55.1<1>:d     r27.1<0;1,0>:d       10:d  
+// for BUFFER 3
+	add (1) r31.1<1>:d     r27.1<0;1,0>:d       12:d   
+	add (1) r40.1<1>:d     r27.1<0;1,0>:d       14:d   
+	add (1) r49.0<1>:d     r27.0<0;1,0>:d       32:d   
+	add (1) r49.1<1>:d     r27.1<0;1,0>:d       12:d  
+	add (1) r58.0<1>:d     r27.0<0;1,0>:d       32:d   
+	add (1) r58.1<1>:d     r27.1<0;1,0>:d       14:d  
+
+//===========
+
+    // write Buf_2 to 3rd quarter of four horizontal output blocks
+    add (4)   a0.0:uw   r22.0<4;4,1>:w          1024:uw
+
+	// pointer swap
+	(f0.0)	mov (1)	uwTemp0<1> a0.0:uw
+	(f0.0)	mov (1)	a0.0:uw a0.2:uw
+	(f0.0)	mov (1)	a0.2:uw uwTemp0<0;1,0>
+
+    add (4)   a0.4:uw   a0.0<4;4,1>:w         r22.8<0;2,1>:w
+
+	mov (8)    mubMSGPAYLOAD0(0,   0)<4>  r[a0.2,   1]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD0(0,   1)<4>  r[a0.1,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD0(0,   2)<4>  r[a0.0,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD0(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD0(1,   0)<4>  r[a0.2,   33]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD0(1,   1)<4>  r[a0.1,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD0(1,   2)<4>  r[a0.0,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD0(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD1(0,   0)<4>  r[a0.6,   1]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD1(0,   1)<4>  r[a0.5,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD1(0,   2)<4>  r[a0.4,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD1(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD1(1,   0)<4>  r[a0.6,   33]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD1(1,   1)<4>  r[a0.5,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD1(1,   2)<4>  r[a0.4,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD1(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD2(0,   0)<4>  r[a0.2,   17]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD2(0,   1)<4>  r[a0.1,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD2(0,   2)<4>  r[a0.0,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD2(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD2(1,   0)<4>  r[a0.2,   49]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD2(1,   1)<4>  r[a0.1,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD2(1,   2)<4>  r[a0.0,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD2(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD3(0,   0)<4>  r[a0.6,   17]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD3(0,   1)<4>  r[a0.5,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD3(0,   2)<4>  r[a0.4,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD3(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD3(1,   0)<4>  r[a0.6,   49]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD3(1,   1)<4>  r[a0.5,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD3(1,   2)<4>  r[a0.4,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD3(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    // write Buf_3 to 4th quarter of four horizontal output blocks
+    add (4)   a0.0:uw   r22.0<4;4,1>:w          1536:uw
+
+	// pointer swap
+	(f0.0)	mov (1)	uwTemp0<1> a0.0:uw
+	(f0.0)	mov (1)	a0.0:uw a0.2:uw
+	(f0.0)	mov (1)	a0.2:uw uwTemp0<0;1,0>
+
+    add (4)   a0.4:uw   a0.0<4;4,1>:w         r22.8<0;2,1>:w
+
+	mov (8)    mubMSGPAYLOAD4(0,   0)<4>  r[a0.2,   1]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD4(0,   1)<4>  r[a0.1,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD4(0,   2)<4>  r[a0.0,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD4(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD4(1,   0)<4>  r[a0.2,   33]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD4(1,   1)<4>  r[a0.1,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD4(1,   2)<4>  r[a0.0,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD4(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD5(0,   0)<4>  r[a0.6,   1]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD5(0,   1)<4>  r[a0.5,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD5(0,   2)<4>  r[a0.4,   1]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD5(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD5(1,   0)<4>  r[a0.6,   33]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD5(1,   1)<4>  r[a0.5,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD5(1,   2)<4>  r[a0.4,   33]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD5(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD6(0,   0)<4>  r[a0.2,   17]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD6(0,   1)<4>  r[a0.1,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD6(0,   2)<4>  r[a0.0,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD6(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD6(1,   0)<4>  r[a0.2,   49]<16;8,2>	    { NoDDClr }
+    mov (8)    mubMSGPAYLOAD6(1,   1)<4>  r[a0.1,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD6(1,   2)<4>  r[a0.0,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD6(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD7(0,   0)<4>  r[a0.6,   17]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD7(0,   1)<4>  r[a0.5,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD7(0,   2)<4>  r[a0.4,   17]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD7(0,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    mov (8)    mubMSGPAYLOAD7(1,   0)<4>  r[a0.6,   49]<16;8,2>		{ NoDDClr }
+    mov (8)    mubMSGPAYLOAD7(1,   1)<4>  r[a0.5,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD7(1,   2)<4>  r[a0.4,   49]<16;8,2>		{ NoDDClr, NoDDChk }
+    mov (8)    mubMSGPAYLOAD7(1,   3)<4>  r2.31:ub					{ NoDDChk }
+
+    // send buffer 2 and buffer 3	
+	send (1)    null<1>:d    r28   	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r37	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r46   	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r55	0x5			0x60A8018:ud
+
+	send (1)    null<1>:d    r31   	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r40	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r49   	0x5			0x60A8018:ud
+    send (1)    null<1>:d    r58	0x5			0x60A8018:ud
diff --git a/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_BGRA.g4a b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_BGRA.g4a
new file mode 100644
index 0000000..4c4144f
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_BGRA.g4a
@@ -0,0 +1,362 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//    7    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_BGRA.asm
+
+
+
+//Module Name: Set_Buf_0123_BGRA
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+    //AVS LAYOUT:(UUYYVVAA)
+        //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+        // V = 8, Y= 4, U = 0, A = 12.
+        mov (4) acc0.0<1>:w                 0x6AE2:v
+        add (4) acc0.0<1>:w                 acc0<4;4,1>:w       70:uw
+        shl (4) r22.0<1>:w       acc0<4;4,1>:w       5:uw
+
+    //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+
+    //SU LAYOUT:(VYUAVYUA)
+        //V = 4, Y = 2, U = 0, A = 6
+        mov (4) acc0.0<1>:w                 0x6024:v
+        add (4) acc0.0<1>:w                 acc0<4;4,1>:w       64:uw
+        shl (4) r18.0<1>:w  acc0<4;4,1>:w       5:uw                    { NoDDClr }     //Convert to BYTE address.
+
+        //OFFSET:
+        mov (1)   r18.4<1>:ud      0x1000100:ud    { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL2.g4a b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL2.g4a
new file mode 100644
index 0000000..1d38ae2
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL2.g4a
@@ -0,0 +1,361 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//    7    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_PL2.asm
+
+
+
+//Module Name: Set_Buf_0123_PL2
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+	//AVS LAYOUT: (YYUUVVAA)
+		//Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+		//For PL2-AVS: V = 8, Y= 0, U = 4, A = 12.
+		mov	(4)	acc0.0<1>:w					0x6EA2:v                    			//Subtract 6 from 0,4,8,12
+		add (4)	acc0.0<1>:w					acc0<4;4,1>:w		70:uw		//add 6 back
+		shl (4)	r22.0<1>:w		acc0<4;4,1>:w		5:uw				//Convert to BYTE address.
+
+	//OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+	//SU LAYOUT:(YUVAYUVA)
+		//V = 4, Y = 0, U = 2, A = 6
+		mov	(4)	acc0.0<1>:w					0x6204:v
+		add (4)	acc0.0<1>:w					acc0<4;4,1>:w		64:uw
+		shl (4)	r18.0<1>:w	acc0<4;4,1>:w		5:uw					{ NoDDClr } 	//Convert to BYTE address.
+
+		//OFFSET:
+		mov (1)	  r18.4<1>:ud		0x1000100:ud	{ NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL3.g4a b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL3.g4a
new file mode 100644
index 0000000..f4d1e1d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL3.g4a
@@ -0,0 +1,361 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//    7    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_PL3.asm
+
+
+
+//Module Name: Set_Buf_0123_PL3
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+    //AVS LAYOUT: (YYUUVVAA)
+        //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+        //For PL3-AVS: V = 8, Y= 0, U = 4, A = 12.
+        mov (4) acc0.0<1>:w                 0x6EA2:v                                //Subtract 6 from 0,4,8,12
+        add (4) acc0.0<1>:w                 acc0<4;4,1>:w       70:uw      //add 6 back
+        shl (4) r22.0<1>:w       acc0<4;4,1>:w       5:uw                //Convert to BYTE address.
+
+    //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+    //SU LAYOUT:(YUVAYUVA)
+        //V = 4, Y = 0, U = 2, A = 6
+        mov (4) acc0.0<1>:w                 0x6204:v
+        add (4) acc0.0<1>:w                 acc0<4;4,1>:w       64:uw
+        shl (4) r18.0<1>:w  acc0<4;4,1>:w       5:uw                    { NoDDClr }     //Convert to BYTE address.
+
+        //OFFSET:
+        mov (1)   r18.4<1>:ud      0x1000100:ud    { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VUYA.g4a b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VUYA.g4a
new file mode 100644
index 0000000..97fda3c
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VUYA.g4a
@@ -0,0 +1,362 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//    7    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_YUVA.asm
+
+
+
+// Module Name : Set_Buf_0123_VUYA
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+    //For AVS: We use surface state as R8G8B8A8_UNORM and hence set pointers to VUYA.
+    //AVS LAYOUT:(VVUUYYAA)
+        //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+        //V = 0, Y= 8, U = 4, A = 12.
+        mov (4) acc0.0<1>:w             0x6E2A:v
+        add (4) acc0.0<1>:w             acc0<4;4,1>:w       70:uw
+        shl (4) r22.0<1>:w   acc0<4;4,1>:w       5:uw
+
+    //Used by Shuffle.
+        //SU LAYOUT:(VUYAVUYA)
+        //V = 0, Y = 4, U = 2, A = 6
+        mov (4) acc0.0<1>:w                 0x6240:v
+        add (4) acc0.0<1>:w                 acc0<4;4,1>:w       64:uw
+        shl (4) r18.0<1>:w  acc0<4;4,1>:w       5:uw                    { NoDDClr } //Convert to BYTE address.
+
+        //OFFSET:
+        mov (1)   r18.4<1>:ud      0x1000100:ud    { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VYUA.g4a b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VYUA.g4a
new file mode 100644
index 0000000..cde1aea
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VYUA.g4a
@@ -0,0 +1,362 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//    7    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_VYUA.asm
+
+
+
+//Module Name: Set_Buf_0123_VYUA
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+    //AVS LAYOUT:(VVYYUUAA)
+        //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+        // V = 0, Y= 4, U = 8, A = 12.
+        mov (4) acc0.0<1>:w                 0x62EA:v                                //Subtract 6 from 0,4,8,12
+        add (4) acc0.0<1>:w                 acc0<4;4,1>:w       70:uw      //add 6 back
+        shl (4) r22.0<1>:w       acc0<4;4,1>:w       5:uw                //Convert to BYTE address.
+
+    //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+
+    //SU LAYOUT:(VYUAVYUA)
+        //V = 0, Y = 2, U = 4, A = 6
+        mov (4) acc0.0<1>:w                 0x6420:v
+        add (4) acc0.0<1>:w                 acc0<4;4,1>:w       64:uw
+        shl (4) r18.0<1>:w  acc0<4;4,1>:w       5:uw                    { NoDDClr }     //Convert to BYTE address.
+
+        //OFFSET:
+        mov (1)   r18.4<1>:ud      0x1000100:ud    { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen7/Set_Layer_0.g4a b/src/shaders/post_processing/gen7/Set_Layer_0.g4a
new file mode 100644
index 0000000..29cda8a
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_Layer_0.g4a
@@ -0,0 +1,467 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+//   18    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+
+
+
+
+//Module name:   Set_Layer_N.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Used to generate LABELS at compile time.
+
+
+//definitions for Expand Mask
+.declare uwMask_Temp1           Base=r17.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw         // 1 GRF
+.declare ubMask_Temp1           Base=r17.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub         // 1 GRF
+.declare udMask_Temp1           Base=r17.0 ElementSize=4 Type=ud                                // 1 GRF
+.declare uwMask_Temp2           Base=r16.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw         // 1 GRF
+.declare ubMask_Temp2           Base=r16.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub         // 1 GRF
+.declare udMask_Temp2           Base=r16.0 ElementSize=4 Type=ud                                // 1 GRF
+
+.declare uwMask_Temp3           Base=r15.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw         // 1 GRF
+.declare ubMask_Temp3           Base=r15.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub         // 1 GRF
+
+.declare udALPHA_MASK_REG       Base=r21.0         ElementSize=4 Type=ud       // 1 GRF
+.declare udALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//Initialize mask reg to FFFF
+
+	mov	(16)	uwALPHA_MASK_REG(0)<1>		0xFFFF:uw
+
+
+//Fast jump for -
+//LAYER0: we determine whether layer 0 is to be loaded and processed or not based
+//		on block mask in module "Set_Layer_0" and store result in f0.1.
+//		This flag is then directly used to while loading buf0-3 and colorfill.
+//		(So flag f0.1 should not be changed from Set_Layer_0 till Colorfill)
+//
+//LAYER1-7: For all other layers, we compute whether layer is to be loaded and processed
+//		  based on block mask in module "Set_Layer_1-7" and store result in SKIP_LAYER
+//		  variable.
+//		  While Loading buf 4 and 5, we move SKIP_LAYER to f0.0 every time and use it
+//		  for Loading.
+//		  For processing though, we move SKIP_LAYER only once to f0.1 in module
+//		  "Set_Buf0_Buf4" and use f0.1 for deciding whether layer 1-7 (all 4 sub blocks)
+//		  is to be processed or not.
+//		  (So flag f0.1) should not be modififed from module "Set_Buf0_Buf4" till module
+//		  that processess sub-block 3).
+//
+//None of the above fast jumps, apply to CSC modules. We always perform CSC irrespective of mask.
+//
+//Example: (Without going into finer details)
+//		Typical Combined kernel:
+//
+//		(let var = decision whether to load/process that layer)
+//
+//		Set_Layer_0			//f0.1 <- var
+//		..
+//		Set_Layer_1			//f0.1 <- var, SKIP_LAYER <- var
+//		..
+//		Load buf 0			//use f0.1
+//		Load buf 4			//f0.0 <- SKIP_LAYER
+//		Load buf 1			//use f0.1
+//		Load buf 5			//f0.0 <- SKIP_LAYER
+//		Load buf 2			//use f0.1
+//		Load buf 3			//use f0.1
+//		..
+//		..
+//		Colorfill
+//		..
+//		Set_Buf0_Buf4		//f0.1 <- SKIP_LAYER
+//		process0-4			//Use f0.1
+//		Load buf 4
+//		Set_Buf1_Buf5
+//		process1-5
+//		Load buf 5
+//		..
+//		Set_Layer_2			//f0.1 <-var, SKIP_LAYER <- var
+//		..
+//		Set_Buf2_Buf4
+//		process2-4
+//		Load buf 4
+//		Set_Buf3_Buf5
+//		process3-5
+//		Load buf 5
+//		..
+
+
+	//For layer 0, use f0.1 directly
+	cmp.ne.f0.1  			(1)  null<1>:d    r7.2:uw 		0:uw
+	(f0.1)cmp.ne.f0.1	(1)  null<1>:d		 r7.3:uw		0:uw
+	(-f0.1)  jmpi  (1)  SKIP_LAYER_L0
+
+
+    and (1) r24.2:ub    r2.2<0;1,0>:uw    3:uw    
+
+
+	//Copy all AVS Payload data
+	// Setup Message Payload Header for 1st block of Media Sampler 8x8 (16x4 for IVB+)
+	mov (1) r25.6:f        r7.5:f		{ NoDDClr }		//NLAS dx
+	mov (1) r25.4:f        r3.0:f				{ NoDDClr, NoDDChk } 	//Step X
+	mov (1) r25.5:f        r4.0:f				{ NoDDClr, NoDDChk }	//Step Y
+
+
+	mov (1) r25.2:f     	r6.0<0;1,0>:f		{ NoDDClr, NoDDChk }	//Orig X
+	mov (1) r25.3:f     	r5.0<0;1,0>:f		{ NoDDChk }		//Orig Y
+
+    //NLAS calculations for 2nd half of blocks of Media Sampler 8x8:
+    // X(i) = X0 + dx*i + ddx*i*(i-1)/2   ==>  X(8) = X0 + dx*8 +ddx*28
+    // dx(i)= dx(0) + ddx*i               ==>  dx(8)= dx + ddx*8
+
+	//OPTIMIZATION: fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY - are sub registers of same GRF. Use NODDCLR NODDCHK. -rT
+
+    // Calculating X(8)
+    mov (1)   acc0.2:f           		r6.0:f
+    mac (1)   acc0.2:f           		r3.0:f          	8.0:f
+    mac (1)   r23.2:f    	r7.5:f     28.0:f		{ NoDDClr }
+
+    // Calculating Y(4)
+    mul (1)   r23.1<1>:f   	r4.0:f       4.0:f   	{ NoDDClr, NoDDChk }  //dY*4
+
+    // Calculating dx(8)
+    mov (1)   acc0.4:f           r3.0:f
+    mac (1)   r23.4:f     r7.5:f		8.0:f		{ NoDDClr, NoDDChk }
+
+	// Binding Index
+	mov (1)	r23.5:ud		0:ud							{ NoDDChk }
+
+
+SKIP_LAYER_L0:
+	nop
+
+
diff --git a/src/shaders/post_processing/gen7/VP_Setup.g4a b/src/shaders/post_processing/gen7/VP_Setup.g4a
new file mode 100644
index 0000000..afde47d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/VP_Setup.g4a
@@ -0,0 +1,853 @@
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+/*
+ *  Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+//  326    // Total instruction count
+//    1    // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type  :ub
+
+.reg_count_total        128
+.reg_count_payload      7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+                                    //  e.g.            byte0   byte1  byte2
+                                    // YUYV               0       1      3
+                                    // YVYU               0       3      1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+                                    //  e.g.              byte0           byte1           byte2
+                                    // YUYV                 0               1               3
+                                    // YVYU                 0               3               1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+//                = 000 0001 (min message len 1 ) 0,0000 (resp len 0   -add later)
+//                  0000,0000,0000
+//                  0001(Spawn a root thread),0001 (Root thread spawn thread)
+//                = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+//                = 000 0110 (min message len 6 ) 0,0000 (resp len 0   -add later)
+//                  1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+//                  0000,0000 (Binding table index, added later)
+//                = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+        // Message Descriptors
+                //                = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+                //                  1 (header present 1) 0 1010 (media block write) 000000
+                //                  00000000 (binding table index - set later)
+                //                = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+                             // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT  r43    // Dummy Message Register for EOT
+
+
+.declare    mubMSGPAYLOAD  Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare    muwMSGPAYLOAD  Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare    mudMSGPAYLOAD  Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare    mfMSGPAYLOAD   Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF		  Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=f
+
+.declare udROBUF		Base=r9.0		ElementSize=4		SrcRegion=<8;8,1>		  DstRegion=<1>		Type=ud
+
+.declare uwROBUF		Base=r9.0		ElementSize=2		SrcRegion=<16;16,1>		DstRegion=<1>		Type=uw
+
+.declare ubROBUF		Base=r9.0		ElementSize=1		SrcRegion=<16;16,1>		DstRegion=<1>		Type=ub
+
+.declare ub4ROBUF 	Base=r9.0		ElementSize=1		SrcRegion=<32;8,4>		DstRegion=<4>		Type=ub
+
+
+// End of common.inc
+
+
+// FileName:		VP_Setup.asm
+// Author:			Vivek Kumar
+// Description:		Sets up all parameters for the Video Processing Kernel
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+     // r0 header            :   r0          (1 GRF)
+     // Static parameters    :   r1 - r6     (6 GRFS)
+     // Inline parameters    :   r7 - r8     (2 GRFs)
+     // MSGSRC               :   r27         (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE    Base=r1.0      ElementSize=4       Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+                                      // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.	    
+
+.declare ubCOLOR_PIXEL_VAL      Base=r2.20      ElementSize=1       SrcRegion=<0;1,0>       DstRegion=<1>       Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+    //Normalised Ratio of Horizontal step size with main video for all layers becomes
+    //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0      Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_1      Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_2      Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_3      Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_4      Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+.declare fBUFFER_5      Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=f
+
+.declare udBUFFER_0     Base=r64.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_1     Base=r80.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_2     Base=r96.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_3     Base=r112.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_4     Base=r28.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+.declare udBUFFER_5     Base=r46.0       ElementSize=4       SrcRegion=<8;8,1>       DstRegion=<1>       Type=ud
+
+.declare uwBUFFER_0     Base=r64.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_1     Base=r80.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_2     Base=r96.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_3     Base=r112.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_4     Base=r28.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+.declare uwBUFFER_5     Base=r46.0       ElementSize=2       SrcRegion=<16;16,1>     DstRegion=<1>       Type=uw
+
+.declare ubBUFFER_0     Base=r64.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_1     Base=r80.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_2     Base=r96.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_3     Base=r112.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_4     Base=r28.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+.declare ubBUFFER_5     Base=r46.0       ElementSize=1       SrcRegion=<16;16,1>     DstRegion=<1>       Type=ub
+
+.declare ub4BUFFER_0    Base=r64.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_1    Base=r80.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_2    Base=r96.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_3    Base=r112.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_4    Base=r28.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+.declare ub4BUFFER_5    Base=r46.0       ElementSize=1       SrcRegion=<32;8,4>      DstRegion=<4>       Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0  Base=r18.0    ElementSize=4 Type=ud       // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1  Base=r19.0    ElementSize=4 Type=ud       // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP  Base=r20.0    ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG       Base=r21.0         ElementSize=2 SrcRegion=<16;16,1> Type=uw        // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Setup pointer to the inline parameter
+
+// Copy MSG HDR
+	mov (8)		r27.0<1>:ud  	r0.0<8;8,1>:ud  // Initialize message payload header with R0
+
+
+//temp; remove it once unread msg warnings are resolved -vK
+mov (8) r25:ud 	r0.0<8;8,1>:ud
+mov (8) r26:ud 	r0.0<8;8,1>:ud
+
+// Calculate StepX for all layers and overwrite it on the ratio
+	mul	(8)		r3.0<1>:f   r3.0<8;8,1>:f 	r7.4<0;1,0>:f				//StepX_ratio = StepX / VideoStepX
+
+	//Normalised Ratio of Horizontal step size with main video for all layers now becomes
+	//Normalised Horizontal step size for all layers
+
+// Calculate block origin for all layers and overwrite it on the frame origin
+	mov (2)	r8.5<1>:f						r7.0<2;2,1>:w		//Convert origin from word to float
+
+	cmp.e.f0.0 	(8)	null<1>:d				r2.26:ub 		1:uw
+
+
+    shr (1) r17.0<1>:uw    r2.2<0;1,0>:uw    0:uw
+    and (1) r17.0<1>:uw    r17.0<0;1,0>:uw           3:uw
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    1:uw
+    (f0.1) jmpi (1) ROTATE_90_L0
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    2:uw
+    (f0.1) jmpi (1) ROTATE_180_L0
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    3:uw
+    (f0.1) jmpi (1) ROTATE_270_L0
+
+  // rotate 0 degree
+ROTATE_0_L0:  
+    (-f0.0)mov	(1)	acc0.0:f					   r6.0<0;1,0>:f
+    (-f0.0)mac	(1)	r6.0<1>:f    r3.0<0;1,0>:f		r8.5<0;1,0>:f
+
+    mov	(1)	acc0.0:f            r5.0<0;1,0>:f
+    mac	(1)	r5.0<1>:f    r4.0<0;1,0>:f		    r8.6<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0
+
+  // rotate 90 degree
+ROTATE_90_L0:  
+    (-f0.0)mov	(1)	acc0.0:f            r6.0<0;1,0>:f
+    (-f0.0)mac	(1)	r6.0<1>:f    r3.0<0;1,0>:f		r8.6<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    add (1) r17.0<1>:f    -r8.5<0;1,0>:f               r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f        -16.0:f  	
+
+    mov	(1)	acc0.0:f							r5.0<0;1,0>:f
+    mac	(1)	r5.0<1>:f     r4.0<0;1,0>:f		r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0
+
+  // rotate 180 degree
+ROTATE_180_L0:  
+    (-f0.0)mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    (-f0.0)add (1) r17.0<1>:f    -r8.5<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.0:f             r6.0<0;1,0>:f
+    (-f0.0)mac (1) r6.0<1>:f     r3.0<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    add (1) r17.0<1>:f    -r8.6<0;1,0>:f                     r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f              -16.0:f  	
+    mov	(1)	acc0.0:f             r5.0<0;1,0>:f
+    mac	(1)	r5.0<1>:f     r4.0<0;1,0>:f                 r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0  
+
+  // rotate 270 degree
+ROTATE_270_L0:    
+    (-f0.0)mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    (-f0.0)add (1) r17.0<1>:f    -r8.6<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.0:f             r6.0<0;1,0>:f
+    (-f0.0)mac (1) r6.0<1>:f     r3.0<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov	(1)	acc0.0:f                    r5.0<0;1,0>:f
+    mac	(1)	r5.0<1>:f            r4.0<0;1,0>:f          r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L0:		
+    nop	
+    shr (1) r17.0<1>:uw    r2.2<0;1,0>:uw    2:uw
+    and (1) r17.0<1>:uw    r17.0<0;1,0>:uw           3:uw
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    1:uw
+    (f0.1) jmpi (1) ROTATE_90_L1
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    2:uw
+    (f0.1) jmpi (1) ROTATE_180_L1
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    3:uw
+    (f0.1) jmpi (1) ROTATE_270_L1
+
+  // rotate 0 degree
+ROTATE_0_L1:  
+    (-f0.0)mov	(1)	acc0.1:f					   r6.1<0;1,0>:f
+    (-f0.0)mac	(1)	r6.1<1>:f    r3.1<0;1,0>:f		r8.5<0;1,0>:f
+
+    mov	(1)	acc0.1:f            r5.1<0;1,0>:f
+    mac	(1)	r5.1<1>:f    r4.1<0;1,0>:f		    r8.6<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L1
+
+  // rotate 90 degree
+ROTATE_90_L1:  
+    (-f0.0)mov	(1)	acc0.1:f            r6.1<0;1,0>:f
+    (-f0.0)mac	(1)	r6.1<1>:f    r3.1<0;1,0>:f		r8.6<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    add (1) r17.0<1>:f    -r8.5<0;1,0>:f               r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f        -16.0:f  	
+
+    mov	(1)	acc0.1:f							r5.1<0;1,0>:f
+    mac	(1)	r5.1<1>:f     r4.1<0;1,0>:f		r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L1
+
+  // rotate 180 degree
+ROTATE_180_L1:  
+    (-f0.0)mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    (-f0.0)add (1) r17.0<1>:f    -r8.5<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.1:f             r6.1<0;1,0>:f
+    (-f0.0)mac (1) r6.1<1>:f     r3.1<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    add (1) r17.0<1>:f    -r8.6<0;1,0>:f                     r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f              -16.0:f  	
+    mov	(1)	acc0.1:f             r5.1<0;1,0>:f
+    mac	(1)	r5.1<1>:f     r4.1<0;1,0>:f                 r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L1  
+
+  // rotate 270 degree
+ROTATE_270_L1:    
+    (-f0.0)mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    (-f0.0)add (1) r17.0<1>:f    -r8.6<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.1:f             r6.1<0;1,0>:f
+    (-f0.0)mac (1) r6.1<1>:f     r3.1<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov	(1)	acc0.1:f                    r5.1<0;1,0>:f
+    mac	(1)	r5.1<1>:f            r4.1<0;1,0>:f          r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L1:		
+    nop	
+    shr (1) r17.0<1>:uw    r2.2<0;1,0>:uw    4:uw
+    and (1) r17.0<1>:uw    r17.0<0;1,0>:uw           3:uw
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    1:uw
+    (f0.1) jmpi (1) ROTATE_90_L2
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    2:uw
+    (f0.1) jmpi (1) ROTATE_180_L2
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    3:uw
+    (f0.1) jmpi (1) ROTATE_270_L2
+
+  // rotate 0 degree
+ROTATE_0_L2:  
+    (-f0.0)mov	(1)	acc0.2:f					   r6.2<0;1,0>:f
+    (-f0.0)mac	(1)	r6.2<1>:f    r3.2<0;1,0>:f		r8.5<0;1,0>:f
+
+    mov	(1)	acc0.2:f            r5.2<0;1,0>:f
+    mac	(1)	r5.2<1>:f    r4.2<0;1,0>:f		    r8.6<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L2
+
+  // rotate 90 degree
+ROTATE_90_L2:  
+    (-f0.0)mov	(1)	acc0.2:f            r6.2<0;1,0>:f
+    (-f0.0)mac	(1)	r6.2<1>:f    r3.2<0;1,0>:f		r8.6<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    add (1) r17.0<1>:f    -r8.5<0;1,0>:f               r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f        -16.0:f  	
+
+    mov	(1)	acc0.2:f							r5.2<0;1,0>:f
+    mac	(1)	r5.2<1>:f     r4.2<0;1,0>:f		r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L2
+
+  // rotate 180 degree
+ROTATE_180_L2:  
+    (-f0.0)mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    (-f0.0)add (1) r17.0<1>:f    -r8.5<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.2:f             r6.2<0;1,0>:f
+    (-f0.0)mac (1) r6.2<1>:f     r3.2<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    add (1) r17.0<1>:f    -r8.6<0;1,0>:f                     r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f              -16.0:f  	
+    mov	(1)	acc0.2:f             r5.2<0;1,0>:f
+    mac	(1)	r5.2<1>:f     r4.2<0;1,0>:f                 r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L2  
+
+  // rotate 270 degree
+ROTATE_270_L2:    
+    (-f0.0)mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    (-f0.0)add (1) r17.0<1>:f    -r8.6<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.2:f             r6.2<0;1,0>:f
+    (-f0.0)mac (1) r6.2<1>:f     r3.2<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov	(1)	acc0.2:f                    r5.2<0;1,0>:f
+    mac	(1)	r5.2<1>:f            r4.2<0;1,0>:f          r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L2:		
+    nop	
+    shr (1) r17.0<1>:uw    r2.2<0;1,0>:uw    6:uw
+    and (1) r17.0<1>:uw    r17.0<0;1,0>:uw           3:uw
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    1:uw
+    (f0.1) jmpi (1) ROTATE_90_L3
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    2:uw
+    (f0.1) jmpi (1) ROTATE_180_L3
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    3:uw
+    (f0.1) jmpi (1) ROTATE_270_L3
+
+  // rotate 0 degree
+ROTATE_0_L3:  
+    (-f0.0)mov	(1)	acc0.3:f					   r6.3<0;1,0>:f
+    (-f0.0)mac	(1)	r6.3<1>:f    r3.3<0;1,0>:f		r8.5<0;1,0>:f
+
+    mov	(1)	acc0.3:f            r5.3<0;1,0>:f
+    mac	(1)	r5.3<1>:f    r4.3<0;1,0>:f		    r8.6<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L3
+
+  // rotate 90 degree
+ROTATE_90_L3:  
+    (-f0.0)mov	(1)	acc0.3:f            r6.3<0;1,0>:f
+    (-f0.0)mac	(1)	r6.3<1>:f    r3.3<0;1,0>:f		r8.6<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    add (1) r17.0<1>:f    -r8.5<0;1,0>:f               r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f        -16.0:f  	
+
+    mov	(1)	acc0.3:f							r5.3<0;1,0>:f
+    mac	(1)	r5.3<1>:f     r4.3<0;1,0>:f		r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L3
+
+  // rotate 180 degree
+ROTATE_180_L3:  
+    (-f0.0)mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    (-f0.0)add (1) r17.0<1>:f    -r8.5<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.3:f             r6.3<0;1,0>:f
+    (-f0.0)mac (1) r6.3<1>:f     r3.3<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    add (1) r17.0<1>:f    -r8.6<0;1,0>:f                     r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f              -16.0:f  	
+    mov	(1)	acc0.3:f             r5.3<0;1,0>:f
+    mac	(1)	r5.3<1>:f     r4.3<0;1,0>:f                 r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L3  
+
+  // rotate 270 degree
+ROTATE_270_L3:    
+    (-f0.0)mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    (-f0.0)add (1) r17.0<1>:f    -r8.6<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.3:f             r6.3<0;1,0>:f
+    (-f0.0)mac (1) r6.3<1>:f     r3.3<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov	(1)	acc0.3:f                    r5.3<0;1,0>:f
+    mac	(1)	r5.3<1>:f            r4.3<0;1,0>:f          r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L3:		
+    nop	
+    shr (1) r17.0<1>:uw    r2.2<0;1,0>:uw    8:uw
+    and (1) r17.0<1>:uw    r17.0<0;1,0>:uw           3:uw
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    1:uw
+    (f0.1) jmpi (1) ROTATE_90_L4
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    2:uw
+    (f0.1) jmpi (1) ROTATE_180_L4
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    3:uw
+    (f0.1) jmpi (1) ROTATE_270_L4
+
+  // rotate 0 degree
+ROTATE_0_L4:  
+    (-f0.0)mov	(1)	acc0.4:f					   r6.4<0;1,0>:f
+    (-f0.0)mac	(1)	r6.4<1>:f    r3.4<0;1,0>:f		r8.5<0;1,0>:f
+
+    mov	(1)	acc0.4:f            r5.4<0;1,0>:f
+    mac	(1)	r5.4<1>:f    r4.4<0;1,0>:f		    r8.6<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L4
+
+  // rotate 90 degree
+ROTATE_90_L4:  
+    (-f0.0)mov	(1)	acc0.4:f            r6.4<0;1,0>:f
+    (-f0.0)mac	(1)	r6.4<1>:f    r3.4<0;1,0>:f		r8.6<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    add (1) r17.0<1>:f    -r8.5<0;1,0>:f               r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f        -16.0:f  	
+
+    mov	(1)	acc0.4:f							r5.4<0;1,0>:f
+    mac	(1)	r5.4<1>:f     r4.4<0;1,0>:f		r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L4
+
+  // rotate 180 degree
+ROTATE_180_L4:  
+    (-f0.0)mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    (-f0.0)add (1) r17.0<1>:f    -r8.5<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.4:f             r6.4<0;1,0>:f
+    (-f0.0)mac (1) r6.4<1>:f     r3.4<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    add (1) r17.0<1>:f    -r8.6<0;1,0>:f                     r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f              -16.0:f  	
+    mov	(1)	acc0.4:f             r5.4<0;1,0>:f
+    mac	(1)	r5.4<1>:f     r4.4<0;1,0>:f                 r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L4  
+
+  // rotate 270 degree
+ROTATE_270_L4:    
+    (-f0.0)mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    (-f0.0)add (1) r17.0<1>:f    -r8.6<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.4:f             r6.4<0;1,0>:f
+    (-f0.0)mac (1) r6.4<1>:f     r3.4<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov	(1)	acc0.4:f                    r5.4<0;1,0>:f
+    mac	(1)	r5.4<1>:f            r4.4<0;1,0>:f          r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L4:		
+    nop	
+    shr (1) r17.0<1>:uw    r2.2<0;1,0>:uw    10:uw
+    and (1) r17.0<1>:uw    r17.0<0;1,0>:uw           3:uw
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    1:uw
+    (f0.1) jmpi (1) ROTATE_90_L5
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    2:uw
+    (f0.1) jmpi (1) ROTATE_180_L5
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    3:uw
+    (f0.1) jmpi (1) ROTATE_270_L5
+
+  // rotate 0 degree
+ROTATE_0_L5:  
+    (-f0.0)mov	(1)	acc0.5:f					   r6.5<0;1,0>:f
+    (-f0.0)mac	(1)	r6.5<1>:f    r3.5<0;1,0>:f		r8.5<0;1,0>:f
+
+    mov	(1)	acc0.5:f            r5.5<0;1,0>:f
+    mac	(1)	r5.5<1>:f    r4.5<0;1,0>:f		    r8.6<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L5
+
+  // rotate 90 degree
+ROTATE_90_L5:  
+    (-f0.0)mov	(1)	acc0.5:f            r6.5<0;1,0>:f
+    (-f0.0)mac	(1)	r6.5<1>:f    r3.5<0;1,0>:f		r8.6<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    add (1) r17.0<1>:f    -r8.5<0;1,0>:f               r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f        -16.0:f  	
+
+    mov	(1)	acc0.5:f							r5.5<0;1,0>:f
+    mac	(1)	r5.5<1>:f     r4.5<0;1,0>:f		r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L5
+
+  // rotate 180 degree
+ROTATE_180_L5:  
+    (-f0.0)mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    (-f0.0)add (1) r17.0<1>:f    -r8.5<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.5:f             r6.5<0;1,0>:f
+    (-f0.0)mac (1) r6.5<1>:f     r3.5<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    add (1) r17.0<1>:f    -r8.6<0;1,0>:f                     r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f              -16.0:f  	
+    mov	(1)	acc0.5:f             r5.5<0;1,0>:f
+    mac	(1)	r5.5<1>:f     r4.5<0;1,0>:f                 r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L5  
+
+  // rotate 270 degree
+ROTATE_270_L5:    
+    (-f0.0)mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    (-f0.0)add (1) r17.0<1>:f    -r8.6<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.5:f             r6.5<0;1,0>:f
+    (-f0.0)mac (1) r6.5<1>:f     r3.5<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov	(1)	acc0.5:f                    r5.5<0;1,0>:f
+    mac	(1)	r5.5<1>:f            r4.5<0;1,0>:f          r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L5:		
+    nop	
+    shr (1) r17.0<1>:uw    r2.2<0;1,0>:uw    12:uw
+    and (1) r17.0<1>:uw    r17.0<0;1,0>:uw           3:uw
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    1:uw
+    (f0.1) jmpi (1) ROTATE_90_L6
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    2:uw
+    (f0.1) jmpi (1) ROTATE_180_L6
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    3:uw
+    (f0.1) jmpi (1) ROTATE_270_L6
+
+  // rotate 0 degree
+ROTATE_0_L6:  
+    (-f0.0)mov	(1)	acc0.6:f					   r6.6<0;1,0>:f
+    (-f0.0)mac	(1)	r6.6<1>:f    r3.6<0;1,0>:f		r8.5<0;1,0>:f
+
+    mov	(1)	acc0.6:f            r5.6<0;1,0>:f
+    mac	(1)	r5.6<1>:f    r4.6<0;1,0>:f		    r8.6<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L6
+
+  // rotate 90 degree
+ROTATE_90_L6:  
+    (-f0.0)mov	(1)	acc0.6:f            r6.6<0;1,0>:f
+    (-f0.0)mac	(1)	r6.6<1>:f    r3.6<0;1,0>:f		r8.6<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    add (1) r17.0<1>:f    -r8.5<0;1,0>:f               r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f        -16.0:f  	
+
+    mov	(1)	acc0.6:f							r5.6<0;1,0>:f
+    mac	(1)	r5.6<1>:f     r4.6<0;1,0>:f		r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L6
+
+  // rotate 180 degree
+ROTATE_180_L6:  
+    (-f0.0)mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    (-f0.0)add (1) r17.0<1>:f    -r8.5<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.6:f             r6.6<0;1,0>:f
+    (-f0.0)mac (1) r6.6<1>:f     r3.6<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    add (1) r17.0<1>:f    -r8.6<0;1,0>:f                     r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f              -16.0:f  	
+    mov	(1)	acc0.6:f             r5.6<0;1,0>:f
+    mac	(1)	r5.6<1>:f     r4.6<0;1,0>:f                 r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L6  
+
+  // rotate 270 degree
+ROTATE_270_L6:    
+    (-f0.0)mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    (-f0.0)add (1) r17.0<1>:f    -r8.6<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.6:f             r6.6<0;1,0>:f
+    (-f0.0)mac (1) r6.6<1>:f     r3.6<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov	(1)	acc0.6:f                    r5.6<0;1,0>:f
+    mac	(1)	r5.6<1>:f            r4.6<0;1,0>:f          r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L6:		
+    nop	
+    shr (1) r17.0<1>:uw    r2.2<0;1,0>:uw    14:uw
+    and (1) r17.0<1>:uw    r17.0<0;1,0>:uw           3:uw
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    1:uw
+    (f0.1) jmpi (1) ROTATE_90_L7
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    2:uw
+    (f0.1) jmpi (1) ROTATE_180_L7
+    cmp.e.f0.1 (1) null<1>:w    r17.0<0;1,0>:uw    3:uw
+    (f0.1) jmpi (1) ROTATE_270_L7
+
+  // rotate 0 degree
+ROTATE_0_L7:  
+    (-f0.0)mov	(1)	acc0.7:f					   r6.7<0;1,0>:f
+    (-f0.0)mac	(1)	r6.7<1>:f    r3.7<0;1,0>:f		r8.5<0;1,0>:f
+
+    mov	(1)	acc0.7:f            r5.7<0;1,0>:f
+    mac	(1)	r5.7<1>:f    r4.7<0;1,0>:f		    r8.6<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L7
+
+  // rotate 90 degree
+ROTATE_90_L7:  
+    (-f0.0)mov	(1)	acc0.7:f            r6.7<0;1,0>:f
+    (-f0.0)mac	(1)	r6.7<1>:f    r3.7<0;1,0>:f		r8.6<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    add (1) r17.0<1>:f    -r8.5<0;1,0>:f               r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f        -16.0:f  	
+
+    mov	(1)	acc0.7:f							r5.7<0;1,0>:f
+    mac	(1)	r5.7<1>:f     r4.7<0;1,0>:f		r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L7
+
+  // rotate 180 degree
+ROTATE_180_L7:  
+    (-f0.0)mov (1) r16.0<1>:f    r2.0<0;1,0>:uw
+    (-f0.0)add (1) r17.0<1>:f    -r8.5<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.7:f             r6.7<0;1,0>:f
+    (-f0.0)mac (1) r6.7<1>:f     r3.7<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    add (1) r17.0<1>:f    -r8.6<0;1,0>:f                     r16.0<0;1,0>:f
+    add (1) r17.0<1>:f    r17.0<0;1,0>:f              -16.0:f  	
+    mov	(1)	acc0.7:f             r5.7<0;1,0>:f
+    mac	(1)	r5.7<1>:f     r4.7<0;1,0>:f                 r17.0<0;1,0>:f
+    jmpi (1) END_SRC_BLOCK_ORIG_COMP_L7  
+
+  // rotate 270 degree
+ROTATE_270_L7:    
+    (-f0.0)mov (1) r16.0<1>:f    r2.1<0;1,0>:uw	
+    (-f0.0)add (1) r17.0<1>:f    -r8.6<0;1,0>:f              r16.0<0;1,0>:f
+    (-f0.0)add (1) r17.0<1>:f    r17.0<0;1,0>:f       -16.0:f  	
+    (-f0.0)mov (1) acc0.7:f             r6.7<0;1,0>:f
+    (-f0.0)mac (1) r6.7<1>:f     r3.7<0;1,0>:f          r17.0<0;1,0>:f
+
+    mov	(1)	acc0.7:f                    r5.7<0;1,0>:f
+    mac	(1)	r5.7<1>:f            r4.7<0;1,0>:f          r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L7:		
+    nop	
+
+
diff --git a/src/shaders/post_processing/gen7/avs.asm b/src/shaders/post_processing/gen7/avs.asm
new file mode 100644
index 0000000..091ed50
--- /dev/null
+++ b/src/shaders/post_processing/gen7/avs.asm
@@ -0,0 +1,19 @@
+// Module name: AVS
+.kernel AVS
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL2.g4a"
+#include "PL2_AVS_Buf_0.g4a"
+#include "PL2_AVS_Buf_1.g4a"
+#include "PL2_AVS_Buf_2.g4a"
+#include "PL2_AVS_Buf_3.g4a"
+#include "Save_AVS_NV12.g4a"        
+#include "EOT.g4a"
+
+.end_code  
+
+.end_kernel
+
+// end of DNDI
diff --git a/src/shaders/post_processing/gen7/avs.g75b b/src/shaders/post_processing/gen7/avs.g75b
new file mode 100644
index 0000000..c25432e
--- /dev/null
+++ b/src/shaders/post_processing/gen7/avs.g75b
@@ -0,0 +1,654 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000290 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+   { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/avs.g7b b/src/shaders/post_processing/gen7/avs.g7b
new file mode 100644
index 0000000..445ae01
--- /dev/null
+++ b/src/shaders/post_processing/gen7/avs.g7b
@@ -0,0 +1,654 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000052 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+   { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/dndi.asm b/src/shaders/post_processing/gen7/dndi.asm
new file mode 100644
index 0000000..b820fdb
--- /dev/null
+++ b/src/shaders/post_processing/gen7/dndi.asm
@@ -0,0 +1,11 @@
+// Module name: DNDI
+.kernel DNDI
+.code
+
+#include "NV12_DI_NV12.g4a"
+
+.end_code  
+
+.end_kernel
+
+// end of DNDI
diff --git a/src/shaders/post_processing/gen7/dndi.g75b b/src/shaders/post_processing/gen7/dndi.g75b
new file mode 100644
index 0000000..ffa9d49
--- /dev/null
+++ b/src/shaders/post_processing/gen7/dndi.g75b
@@ -0,0 +1,46 @@
+   { 0x00600001, 0x22400021, 0x008d0000, 0x00000000 },
+   { 0x00000401, 0x226801ad, 0x000000e0, 0x00000000 },
+   { 0x00000801, 0x227801ad, 0x000000e2, 0x00000000 },
+   { 0x02600031, 0x25c00e21, 0x00000240, 0x04ae8003 },
+   { 0x00200001, 0x20e0012d, 0x004506fc, 0x00000000 },
+   { 0x00600001, 0x22800021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x22a00021, 0x008d06c0, 0x00000000 },
+   { 0x00000408, 0x22803da1, 0x000000e0, 0x00010001 },
+   { 0x00000c01, 0x228401a1, 0x000000e2, 0x00000000 },
+   { 0x00000801, 0x22880061, 0x00000000, 0x00030007 },
+   { 0x05600031, 0x20000e24, 0x00000280, 0x040a8021 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000021, 0x008d0000, 0x00000000 },
+   { 0x00000408, 0x23003da1, 0x000000e0, 0x00010001 },
+   { 0x00000041, 0x24043da0, 0x000000e2, 0x00030003 },
+   { 0x00000c08, 0x23043c01, 0x00000404, 0x00020002 },
+   { 0x00000801, 0x23080061, 0x00000000, 0x00020007 },
+   { 0x00200040, 0x23002421, 0x00450300, 0x00450038 },
+   { 0x00000401, 0x23200021, 0x000006e4, 0x00000000 },
+   { 0x00200c01, 0x432c0021, 0x004506ec, 0x00000000 },
+   { 0x00200801, 0x43280021, 0x004506f4, 0x00000000 },
+   { 0x05600031, 0x20000e24, 0x00000300, 0x040a8021 },
+   { 0x00600001, 0x23800021, 0x008d0000, 0x00000000 },
+   { 0x00000401, 0x238001a5, 0x000000e0, 0x00000000 },
+   { 0x00000c01, 0x238401a5, 0x000000e2, 0x00000000 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x22400021, 0x008d0380, 0x00000000 },
+   { 0x00600001, 0x22e00021, 0x008d0380, 0x00000000 },
+   { 0x00600001, 0x22600021, 0x008d05c0, 0x00000000 },
+   { 0x00600001, 0x22800021, 0x008d05e0, 0x00000000 },
+   { 0x00600001, 0x23000021, 0x008d0640, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0660, 0x00000000 },
+   { 0x0000040c, 0x23843ca5, 0x00000384, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x0001000f },
+   { 0x00800401, 0x42c00231, 0x00ce0601, 0x00000000 },
+   { 0x00800801, 0x42c10231, 0x00ce0600, 0x00000000 },
+   { 0x00800401, 0x43600231, 0x00ce0681, 0x00000000 },
+   { 0x00800801, 0x43610231, 0x00ce0680, 0x00000000 },
+   { 0x00600001, 0x22a00021, 0x008d0380, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0380, 0x00000000 },
+   { 0x05600031, 0x20000e24, 0x00000240, 0x060a801b },
+   { 0x05600031, 0x20000e24, 0x000002e0, 0x060a801e },
+   { 0x05600031, 0x20000e24, 0x000002a0, 0x040a801c },
+   { 0x05600031, 0x20000e24, 0x00000340, 0x040a801f },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/dndi.g7b b/src/shaders/post_processing/gen7/dndi.g7b
new file mode 100644
index 0000000..ffa9d49
--- /dev/null
+++ b/src/shaders/post_processing/gen7/dndi.g7b
@@ -0,0 +1,46 @@
+   { 0x00600001, 0x22400021, 0x008d0000, 0x00000000 },
+   { 0x00000401, 0x226801ad, 0x000000e0, 0x00000000 },
+   { 0x00000801, 0x227801ad, 0x000000e2, 0x00000000 },
+   { 0x02600031, 0x25c00e21, 0x00000240, 0x04ae8003 },
+   { 0x00200001, 0x20e0012d, 0x004506fc, 0x00000000 },
+   { 0x00600001, 0x22800021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x22a00021, 0x008d06c0, 0x00000000 },
+   { 0x00000408, 0x22803da1, 0x000000e0, 0x00010001 },
+   { 0x00000c01, 0x228401a1, 0x000000e2, 0x00000000 },
+   { 0x00000801, 0x22880061, 0x00000000, 0x00030007 },
+   { 0x05600031, 0x20000e24, 0x00000280, 0x040a8021 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000021, 0x008d0000, 0x00000000 },
+   { 0x00000408, 0x23003da1, 0x000000e0, 0x00010001 },
+   { 0x00000041, 0x24043da0, 0x000000e2, 0x00030003 },
+   { 0x00000c08, 0x23043c01, 0x00000404, 0x00020002 },
+   { 0x00000801, 0x23080061, 0x00000000, 0x00020007 },
+   { 0x00200040, 0x23002421, 0x00450300, 0x00450038 },
+   { 0x00000401, 0x23200021, 0x000006e4, 0x00000000 },
+   { 0x00200c01, 0x432c0021, 0x004506ec, 0x00000000 },
+   { 0x00200801, 0x43280021, 0x004506f4, 0x00000000 },
+   { 0x05600031, 0x20000e24, 0x00000300, 0x040a8021 },
+   { 0x00600001, 0x23800021, 0x008d0000, 0x00000000 },
+   { 0x00000401, 0x238001a5, 0x000000e0, 0x00000000 },
+   { 0x00000c01, 0x238401a5, 0x000000e2, 0x00000000 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x22400021, 0x008d0380, 0x00000000 },
+   { 0x00600001, 0x22e00021, 0x008d0380, 0x00000000 },
+   { 0x00600001, 0x22600021, 0x008d05c0, 0x00000000 },
+   { 0x00600001, 0x22800021, 0x008d05e0, 0x00000000 },
+   { 0x00600001, 0x23000021, 0x008d0640, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0660, 0x00000000 },
+   { 0x0000040c, 0x23843ca5, 0x00000384, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x0001000f },
+   { 0x00800401, 0x42c00231, 0x00ce0601, 0x00000000 },
+   { 0x00800801, 0x42c10231, 0x00ce0600, 0x00000000 },
+   { 0x00800401, 0x43600231, 0x00ce0681, 0x00000000 },
+   { 0x00800801, 0x43610231, 0x00ce0680, 0x00000000 },
+   { 0x00600001, 0x22a00021, 0x008d0380, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0380, 0x00000000 },
+   { 0x05600031, 0x20000e24, 0x00000240, 0x060a801b },
+   { 0x05600031, 0x20000e24, 0x000002e0, 0x060a801e },
+   { 0x05600031, 0x20000e24, 0x000002a0, 0x040a801c },
+   { 0x05600031, 0x20000e24, 0x00000340, 0x040a801f },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/nv12_dn_nv12.asm b/src/shaders/post_processing/gen7/nv12_dn_nv12.asm
new file mode 100644
index 0000000..94e7c6b
--- /dev/null
+++ b/src/shaders/post_processing/gen7/nv12_dn_nv12.asm
@@ -0,0 +1,5 @@
+// Module name: DN
+
+#include "NV12_DN_NV12.g4a"
+
+// end of DNDI
diff --git a/src/shaders/post_processing/gen7/nv12_dn_nv12.g75b b/src/shaders/post_processing/gen7/nv12_dn_nv12.g75b
new file mode 100644
index 0000000..a43e216
--- /dev/null
+++ b/src/shaders/post_processing/gen7/nv12_dn_nv12.g75b
@@ -0,0 +1,40 @@
+   { 0x00600001, 0x22400021, 0x008d0000, 0x00000000 },
+   { 0x00000401, 0x226801ad, 0x000000e0, 0x00000000 },
+   { 0x00000801, 0x227801ad, 0x000000e2, 0x00000000 },
+   { 0x02600031, 0x25c00e21, 0x00000240, 0x045e8003 },
+   { 0x00200001, 0x20e0012d, 0x0045065c, 0x00000000 },
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00200001, 0x22e00021, 0x00450640, 0x00000000 },
+   { 0x00200008, 0x23603da1, 0x004500e0, 0x00020002 },
+   { 0x00000440, 0x23602421, 0x00000360, 0x00000038 },
+   { 0x00000801, 0x23680061, 0x00000000, 0x00010003 },
+   { 0x00600001, 0x22c00021, 0x008d0360, 0x00000000 },
+   { 0x05600031, 0x20000e24, 0x000002c0, 0x040a8021 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000021, 0x008d0000, 0x00000000 },
+   { 0x00000408, 0x23003da1, 0x000000e0, 0x00010001 },
+   { 0x00000041, 0x24043da0, 0x000000e2, 0x00030003 },
+   { 0x00000c08, 0x23043c01, 0x00000404, 0x00020002 },
+   { 0x00000801, 0x23080061, 0x00000000, 0x00050003 },
+   { 0x00200040, 0x23002421, 0x00450300, 0x00450038 },
+   { 0x00000401, 0x23200231, 0x00000648, 0x00000000 },
+   { 0x00000c01, 0x23260129, 0x00000656, 0x00000000 },
+   { 0x00200c01, 0x23280129, 0x00450658, 0x00000000 },
+   { 0x00000c01, 0x23320129, 0x00000650, 0x00000000 },
+   { 0x00200801, 0x23340129, 0x00450652, 0x00000000 },
+   { 0x05600031, 0x20000e24, 0x00000300, 0x040a8021 },
+   { 0x00200040, 0x236035a5, 0x004500e0, 0x00450088 },
+   { 0x0000040c, 0x23643ca5, 0x00000364, 0x00010001 },
+   { 0x00000801, 0x23680061, 0x00000000, 0x0004000f },
+   { 0x00600001, 0x24800021, 0x008d0360, 0x00000000 },
+   { 0x04600031, 0x27400e21, 0x00000480, 0x02390001 },
+   { 0x00000001, 0x24880061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x25a00021, 0x008d0000, 0x00000000 },
+   { 0x00200401, 0x25a001a1, 0x004500e0, 0x00000000 },
+   { 0x00000801, 0x25a80061, 0x00000000, 0x0007000f },
+   { 0x05600031, 0x20000e24, 0x000005a0, 0x0a0a8018 },
+   { 0x00600001, 0x24a00021, 0x008d0740, 0x00000000 },
+   { 0x00600001, 0x24c00021, 0x008d0760, 0x00000000 },
+   { 0x05600031, 0x20000e24, 0x00000480, 0x060a8019 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/nv12_dn_nv12.g7b b/src/shaders/post_processing/gen7/nv12_dn_nv12.g7b
new file mode 100644
index 0000000..a43e216
--- /dev/null
+++ b/src/shaders/post_processing/gen7/nv12_dn_nv12.g7b
@@ -0,0 +1,40 @@
+   { 0x00600001, 0x22400021, 0x008d0000, 0x00000000 },
+   { 0x00000401, 0x226801ad, 0x000000e0, 0x00000000 },
+   { 0x00000801, 0x227801ad, 0x000000e2, 0x00000000 },
+   { 0x02600031, 0x25c00e21, 0x00000240, 0x045e8003 },
+   { 0x00200001, 0x20e0012d, 0x0045065c, 0x00000000 },
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00200001, 0x22e00021, 0x00450640, 0x00000000 },
+   { 0x00200008, 0x23603da1, 0x004500e0, 0x00020002 },
+   { 0x00000440, 0x23602421, 0x00000360, 0x00000038 },
+   { 0x00000801, 0x23680061, 0x00000000, 0x00010003 },
+   { 0x00600001, 0x22c00021, 0x008d0360, 0x00000000 },
+   { 0x05600031, 0x20000e24, 0x000002c0, 0x040a8021 },
+   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x23000021, 0x008d0000, 0x00000000 },
+   { 0x00000408, 0x23003da1, 0x000000e0, 0x00010001 },
+   { 0x00000041, 0x24043da0, 0x000000e2, 0x00030003 },
+   { 0x00000c08, 0x23043c01, 0x00000404, 0x00020002 },
+   { 0x00000801, 0x23080061, 0x00000000, 0x00050003 },
+   { 0x00200040, 0x23002421, 0x00450300, 0x00450038 },
+   { 0x00000401, 0x23200231, 0x00000648, 0x00000000 },
+   { 0x00000c01, 0x23260129, 0x00000656, 0x00000000 },
+   { 0x00200c01, 0x23280129, 0x00450658, 0x00000000 },
+   { 0x00000c01, 0x23320129, 0x00000650, 0x00000000 },
+   { 0x00200801, 0x23340129, 0x00450652, 0x00000000 },
+   { 0x05600031, 0x20000e24, 0x00000300, 0x040a8021 },
+   { 0x00200040, 0x236035a5, 0x004500e0, 0x00450088 },
+   { 0x0000040c, 0x23643ca5, 0x00000364, 0x00010001 },
+   { 0x00000801, 0x23680061, 0x00000000, 0x0004000f },
+   { 0x00600001, 0x24800021, 0x008d0360, 0x00000000 },
+   { 0x04600031, 0x27400e21, 0x00000480, 0x02390001 },
+   { 0x00000001, 0x24880061, 0x00000000, 0x0003000f },
+   { 0x00600001, 0x25a00021, 0x008d0000, 0x00000000 },
+   { 0x00200401, 0x25a001a1, 0x004500e0, 0x00000000 },
+   { 0x00000801, 0x25a80061, 0x00000000, 0x0007000f },
+   { 0x05600031, 0x20000e24, 0x000005a0, 0x0a0a8018 },
+   { 0x00600001, 0x24a00021, 0x008d0740, 0x00000000 },
+   { 0x00600001, 0x24c00021, 0x008d0760, 0x00000000 },
+   { 0x05600031, 0x20000e24, 0x00000480, 0x060a8019 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pa_to_pl2.asm b/src/shaders/post_processing/gen7/pa_to_pl2.asm
new file mode 100644
index 0000000..1e952b4
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl2.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PA_TO_pl2
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_VYUA.g4a"
+#include "PA_AVS_Buf_0.g4a"
+#include "PA_AVS_Buf_1.g4a"
+#include "PA_AVS_Buf_2.g4a"
+#include "PA_AVS_Buf_3.g4a"
+#include "Save_AVS_NV12.g4a"        
+#include "EOT.g4a"
+
+.end_code  
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pa_to_pl2.g75b b/src/shaders/post_processing/gen7/pa_to_pl2.g75b
new file mode 100644
index 0000000..c6aa5fb
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl2.g75b
@@ -0,0 +1,633 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000190 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+   { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pa_to_pl2.g7b b/src/shaders/post_processing/gen7/pa_to_pl2.g7b
new file mode 100644
index 0000000..d3605bc
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl2.g7b
@@ -0,0 +1,633 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000032 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+   { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pa_to_pl3.asm b/src/shaders/post_processing/gen7/pa_to_pl3.asm
new file mode 100644
index 0000000..d149f48
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PA_TO_PL3
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_VYUA.g4a"
+#include "PA_AVS_Buf_0.g4a"
+#include "PA_AVS_Buf_1.g4a"
+#include "PA_AVS_Buf_2.g4a"
+#include "PA_AVS_Buf_3.g4a"
+#include "Save_AVS_PL3.g4a"        
+#include "EOT.g4a"
+
+.end_code  
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pa_to_pl3.g75b b/src/shaders/post_processing/gen7/pa_to_pl3.g75b
new file mode 100644
index 0000000..bf68443
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl3.g75b
@@ -0,0 +1,586 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000190 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+   { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+   { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+   { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pa_to_pl3.g7b b/src/shaders/post_processing/gen7/pa_to_pl3.g7b
new file mode 100644
index 0000000..63c6c7a
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl3.g7b
@@ -0,0 +1,586 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000032 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+   { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+   { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+   { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pa.asm b/src/shaders/post_processing/gen7/pl2_to_pa.asm
new file mode 100644
index 0000000..5f737ee
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pa.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL2_TO_PA
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL2.g4a"
+#include "PL2_AVS_Buf_0.g4a"
+#include "PL2_AVS_Buf_1.g4a"
+#include "PL2_AVS_Buf_2.g4a"
+#include "PL2_AVS_Buf_3.g4a"
+#include "Save_AVS_PA.g4a"        
+#include "EOT.g4a"
+
+.end_code  
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl2_to_pa.g75b b/src/shaders/post_processing/gen7/pl2_to_pa.g75b
new file mode 100644
index 0000000..5074bc2
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pa.g75b
@@ -0,0 +1,697 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000290 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 },
+   { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 },
+   { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 },
+   { 0x00000801, 0x23680061, 0x00000000, 0x0001001f },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pa.g7b b/src/shaders/post_processing/gen7/pl2_to_pa.g7b
new file mode 100644
index 0000000..053b837
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pa.g7b
@@ -0,0 +1,697 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000052 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 },
+   { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 },
+   { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 },
+   { 0x00000801, 0x23680061, 0x00000000, 0x0001001f },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl2.asm b/src/shaders/post_processing/gen7/pl2_to_pl2.asm
new file mode 100644
index 0000000..6e840d5
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl2.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL2_TO_PL2
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL2.g4a"
+#include "PL2_AVS_Buf_0.g4a"
+#include "PL2_AVS_Buf_1.g4a"
+#include "PL2_AVS_Buf_2.g4a"
+#include "PL2_AVS_Buf_3.g4a"
+#include "Save_AVS_NV12.g4a"        
+#include "EOT.g4a"
+
+.end_code  
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl2.g75b b/src/shaders/post_processing/gen7/pl2_to_pl2.g75b
new file mode 100644
index 0000000..c25432e
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl2.g75b
@@ -0,0 +1,654 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000290 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+   { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl2.g7b b/src/shaders/post_processing/gen7/pl2_to_pl2.g7b
new file mode 100644
index 0000000..445ae01
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl2.g7b
@@ -0,0 +1,654 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000052 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+   { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl3.asm b/src/shaders/post_processing/gen7/pl2_to_pl3.asm
new file mode 100644
index 0000000..d48071b
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL2_TO_PL3
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL2.g4a"
+#include "PL2_AVS_Buf_0.g4a"
+#include "PL2_AVS_Buf_1.g4a"
+#include "PL2_AVS_Buf_2.g4a"
+#include "PL2_AVS_Buf_3.g4a"
+#include "Save_AVS_PL3.g4a"        
+#include "EOT.g4a"
+
+.end_code  
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl3.g75b b/src/shaders/post_processing/gen7/pl2_to_pl3.g75b
new file mode 100644
index 0000000..77bcdcb
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl3.g75b
@@ -0,0 +1,607 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000290 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+   { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+   { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+   { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl3.g7b b/src/shaders/post_processing/gen7/pl2_to_pl3.g7b
new file mode 100644
index 0000000..bec05ac
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl3.g7b
@@ -0,0 +1,607 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000052 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+   { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+   { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+   { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pa.asm b/src/shaders/post_processing/gen7/pl3_to_pa.asm
new file mode 100644
index 0000000..b3be8f9
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pa.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL3_TO_PA
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL3.g4a"
+#include "PL3_AVS_Buf_0.g4a"
+#include "PL3_AVS_Buf_1.g4a"
+#include "PL3_AVS_Buf_2.g4a"
+#include "PL3_AVS_Buf_3.g4a"
+#include "Save_AVS_PA.g4a"        
+#include "EOT.g4a"
+
+.end_code  
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl3_to_pa.g75b b/src/shaders/post_processing/gen7/pl3_to_pa.g75b
new file mode 100644
index 0000000..d11ab6d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pa.g75b
@@ -0,0 +1,709 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002c0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 },
+   { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 },
+   { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 },
+   { 0x00000801, 0x23680061, 0x00000000, 0x0001001f },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pa.g7b b/src/shaders/post_processing/gen7/pl3_to_pa.g7b
new file mode 100644
index 0000000..e1c3c05
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pa.g7b
@@ -0,0 +1,709 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000058 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 },
+   { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 },
+   { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 },
+   { 0x00000801, 0x23680061, 0x00000000, 0x0001001f },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+   { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+   { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+   { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+   { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+   { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+   { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+   { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+   { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+   { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+   { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+   { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+   { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+   { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+   { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+   { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+   { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+   { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+   { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+   { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+   { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+   { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+   { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+   { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+   { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+   { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c },
+   { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e },
+   { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+   { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+   { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+   { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+   { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+   { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+   { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+   { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+   { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+   { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+   { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+   { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl2.asm b/src/shaders/post_processing/gen7/pl3_to_pl2.asm
new file mode 100644
index 0000000..6123fc9
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl2.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL3_TO_PL2
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL3.g4a"
+#include "PL3_AVS_Buf_0.g4a"
+#include "PL3_AVS_Buf_1.g4a"
+#include "PL3_AVS_Buf_2.g4a"
+#include "PL3_AVS_Buf_3.g4a"
+#include "Save_AVS_NV12.g4a"        
+#include "EOT.g4a"
+
+.end_code  
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl2.g75b b/src/shaders/post_processing/gen7/pl3_to_pl2.g75b
new file mode 100644
index 0000000..0cf2d5d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl2.g75b
@@ -0,0 +1,666 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002c0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+   { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl2.g7b b/src/shaders/post_processing/gen7/pl3_to_pl2.g7b
new file mode 100644
index 0000000..9b3a59c
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl2.g7b
@@ -0,0 +1,666 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000058 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+   { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+   { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+   { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+   { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+   { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+   { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+   { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+   { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+   { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+   { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+   { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+   { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+   { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+   { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+   { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+   { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+   { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+   { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+   { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl3.asm b/src/shaders/post_processing/gen7/pl3_to_pl3.asm
new file mode 100644
index 0000000..0861513
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL3_TO_PL3
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL3.g4a"
+#include "PL3_AVS_Buf_0.g4a"
+#include "PL3_AVS_Buf_1.g4a"
+#include "PL3_AVS_Buf_2.g4a"
+#include "PL3_AVS_Buf_3.g4a"
+#include "Save_AVS_PL3.g4a"        
+#include "EOT.g4a"
+
+.end_code  
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl3.g75b b/src/shaders/post_processing/gen7/pl3_to_pl3.g75b
new file mode 100644
index 0000000..7d204f7
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl3.g75b
@@ -0,0 +1,619 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002c0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+   { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+   { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+   { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl3.g7b b/src/shaders/post_processing/gen7/pl3_to_pl3.g7b
new file mode 100644
index 0000000..6929daa
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl3.g7b
@@ -0,0 +1,619 @@
+   { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+   { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+   { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+   { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+   { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+   { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+   { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+   { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+   { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+   { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+   { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+   { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+   { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+   { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+   { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+   { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+   { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+   { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+   { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+   { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+   { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+   { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+   { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+   { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+   { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+   { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+   { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+   { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+   { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+   { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+   { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+   { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+   { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+   { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+   { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+   { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+   { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+   { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+   { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+   { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+   { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+   { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+   { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+   { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+   { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+   { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+   { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+   { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+   { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+   { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+   { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+   { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+   { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+   { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+   { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+   { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+   { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+   { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+   { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+   { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000058 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+   { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+   { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+   { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+   { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+   { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+   { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+   { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+   { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+   { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+   { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+   { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+   { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+   { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+   { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+   { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+   { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+   { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+   { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+   { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+   { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+   { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+   { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+   { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+   { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+   { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+   { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+   { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+   { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+   { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+   { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+   { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+   { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+   { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+   { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+   { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+   { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+   { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+   { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+   { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+   { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+   { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+   { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+   { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+   { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+   { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+   { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+   { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+   { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+   { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+   { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+   { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+   { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+   { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+   { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+   { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+   { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+   { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+   { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+   { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+   { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+   { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+   { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+   { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+   { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+   { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+   { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+   { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+   { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+   { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+   { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+   { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+   { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+   { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+   { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+   { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+   { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+   { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+   { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+   { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+   { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+   { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+   { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+   { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+   { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/nv12_avs_nv12.asm b/src/shaders/post_processing/nv12_avs_nv12.asm
deleted file mode 100644
index 80665e0..0000000
--- a/src/shaders/post_processing/nv12_avs_nv12.asm
+++ /dev/null
@@ -1,19 +0,0 @@
-// Module name: NV12_AVS_NV12
-.kernel NV12_AVS_NV12
-.code
-
-#define INC_SCALING
-        
-#include "SetupVPKernel.asm"
-#include "Multiple_Loop_Head.asm"
-#include "PL2_AVS_IEF_16x8.asm"
-#include "PL8x4_Save_NV12.asm"
-#include "Multiple_Loop.asm"
-
-END_THREAD  // End of Thread
-
-.end_code  
-
-.end_kernel
-
-// end of nv12_avs_nv12.asm
diff --git a/src/shaders/post_processing/nv12_avs_nv12.g4b.gen5 b/src/shaders/post_processing/nv12_avs_nv12.g4b.gen5
deleted file mode 100644
index 1fa4261..0000000
--- a/src/shaders/post_processing/nv12_avs_nv12.g4b.gen5
+++ /dev/null
@@ -1,162 +0,0 @@
-   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
-   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
-   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
-   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
-   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
-   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
-   { 0x01000005, 0x20002d2c, 0x00000088, 0x80008000 },
-   { 0x00010001, 0x20c003fd, 0x00000000, 0x00000000 },
-   { 0x00000001, 0x212003bd, 0x000000c0, 0x00000000 },
-   { 0x00000001, 0x212403bd, 0x000000bc, 0x00000000 },
-   { 0x00000001, 0x213403bd, 0x00000038, 0x00000000 },
-   { 0x00200001, 0x612803bd, 0x004500a4, 0x00000000 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
-   { 0x00802001, 0x20000022, 0x008d0100, 0x00000000 },
-   { 0x00000031, 0x25401c09, 0x208d0000, 0x044bb401 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
-   { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
-   { 0x02000031, 0x25c01c09, 0x208d0000, 0x048bb802 },
-   { 0x00000001, 0x240803bc, 0x000000a4, 0x00000000 },
-   { 0x00000048, 0x24087fbc, 0x000000bc, 0x41000000 },
-   { 0x00000048, 0x21287fbd, 0x000000c0, 0x41e00000 },
-   { 0x00000001, 0x240403bc, 0x000000bc, 0x00000000 },
-   { 0x00000048, 0x21247fbd, 0x000000c0, 0x41000000 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
-   { 0x00802001, 0x20000022, 0x008d0100, 0x00000000 },
-   { 0x00000031, 0x27401c09, 0x208d0000, 0x044bb401 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
-   { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
-   { 0x02000031, 0x27c01c09, 0x208d0000, 0x048bb802 },
-   { 0x00600001, 0x21400229, 0x00aa0541, 0x00000000 },
-   { 0x00600001, 0x21600229, 0x00aa0549, 0x00000000 },
-   { 0x00600001, 0x21800229, 0x00aa0561, 0x00000000 },
-   { 0x00600001, 0x21a00229, 0x00aa0569, 0x00000000 },
-   { 0x00600001, 0x21c00229, 0x00aa0581, 0x00000000 },
-   { 0x00600001, 0x21e00229, 0x00aa0589, 0x00000000 },
-   { 0x00600001, 0x22000229, 0x00aa05a1, 0x00000000 },
-   { 0x00600001, 0x22200229, 0x00aa05a9, 0x00000000 },
-   { 0x00600001, 0x23400229, 0x00aa05c1, 0x00000000 },
-   { 0x00600001, 0x23600229, 0x00aa05c9, 0x00000000 },
-   { 0x00600001, 0x23800229, 0x00aa05e1, 0x00000000 },
-   { 0x00600001, 0x23a00229, 0x00aa05e9, 0x00000000 },
-   { 0x00600001, 0x23c00229, 0x00aa0641, 0x00000000 },
-   { 0x00600001, 0x23e00229, 0x00aa0649, 0x00000000 },
-   { 0x00600001, 0x24000229, 0x00aa0661, 0x00000000 },
-   { 0x00600001, 0x24200229, 0x00aa0669, 0x00000000 },
-   { 0x00600001, 0x22400229, 0x00aa0601, 0x00000000 },
-   { 0x00600001, 0x22600229, 0x00aa0609, 0x00000000 },
-   { 0x00600001, 0x22800229, 0x00aa0621, 0x00000000 },
-   { 0x00600001, 0x22a00229, 0x00aa0629, 0x00000000 },
-   { 0x00600001, 0x22c00229, 0x00aa0681, 0x00000000 },
-   { 0x00600001, 0x22e00229, 0x00aa0689, 0x00000000 },
-   { 0x00600001, 0x23000229, 0x00aa06a1, 0x00000000 },
-   { 0x00600001, 0x23200229, 0x00aa06a9, 0x00000000 },
-   { 0x00600001, 0x21500229, 0x00aa0741, 0x00000000 },
-   { 0x00600001, 0x21700229, 0x00aa0749, 0x00000000 },
-   { 0x00600001, 0x21900229, 0x00aa0761, 0x00000000 },
-   { 0x00600001, 0x21b00229, 0x00aa0769, 0x00000000 },
-   { 0x00600001, 0x21d00229, 0x00aa0781, 0x00000000 },
-   { 0x00600001, 0x21f00229, 0x00aa0789, 0x00000000 },
-   { 0x00600001, 0x22100229, 0x00aa07a1, 0x00000000 },
-   { 0x00600001, 0x22300229, 0x00aa07a9, 0x00000000 },
-   { 0x00600001, 0x23500229, 0x00aa07c1, 0x00000000 },
-   { 0x00600001, 0x23700229, 0x00aa07c9, 0x00000000 },
-   { 0x00600001, 0x23900229, 0x00aa07e1, 0x00000000 },
-   { 0x00600001, 0x23b00229, 0x00aa07e9, 0x00000000 },
-   { 0x00600001, 0x23d00229, 0x00aa0841, 0x00000000 },
-   { 0x00600001, 0x23f00229, 0x00aa0849, 0x00000000 },
-   { 0x00600001, 0x24100229, 0x00aa0861, 0x00000000 },
-   { 0x00600001, 0x24300229, 0x00aa0869, 0x00000000 },
-   { 0x00600001, 0x22500229, 0x00aa0801, 0x00000000 },
-   { 0x00600001, 0x22700229, 0x00aa0809, 0x00000000 },
-   { 0x00600001, 0x22900229, 0x00aa0821, 0x00000000 },
-   { 0x00600001, 0x22b00229, 0x00aa0829, 0x00000000 },
-   { 0x00600001, 0x22d00229, 0x00aa0881, 0x00000000 },
-   { 0x00600001, 0x22f00229, 0x00aa0889, 0x00000000 },
-   { 0x00600001, 0x23100229, 0x00aa08a1, 0x00000000 },
-   { 0x00600001, 0x23300229, 0x00aa08a9, 0x00000000 },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
-   { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
-   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
-   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
-   { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
-   { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
-   { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
-   { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
-   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
-   { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
-   { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
-   { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
-   { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
-   { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
-   { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
-   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
-   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
-   { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
-   { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
-   { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
-   { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
-   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
-   { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
-   { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
-   { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
-   { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
-   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
-   { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
-   { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
-   { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
-   { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
-   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
-   { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
-   { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
-   { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
-   { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
-   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
-   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
-   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
-   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
-   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
-   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
-   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
-   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
-   { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
-   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
-   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
-   { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
-   { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
-   { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
-   { 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
-   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
-   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
-   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
-   { 0x00000041, 0x24407fbd, 0x000000bc, 0x41800000 },
-   { 0x00000040, 0x20a477bd, 0x00000440, 0x000000a4 },
-   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
-   { 0x00010220, 0x34001c00, 0x02001400, 0xfffffede },
-   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
-   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
-   { 0x00000001, 0x20a403bd, 0x00000094, 0x00000000 },
-   { 0x00000041, 0x24407fbd, 0x00000038, 0x41000000 },
-   { 0x00000040, 0x20a877bd, 0x00000440, 0x000000a8 },
-   { 0x00000220, 0x34001c00, 0x00001400, 0xfffffed2 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
-   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
-   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
-   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/nv12_avs_nv12.g6b b/src/shaders/post_processing/nv12_avs_nv12.g6b
deleted file mode 100644
index 7e1dfc3..0000000
--- a/src/shaders/post_processing/nv12_avs_nv12.g6b
+++ /dev/null
@@ -1,235 +0,0 @@
-   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
-   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
-   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
-   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
-   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
-   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
-   { 0x01000005, 0x20002d2c, 0x00000088, 0x80008000 },
-   { 0x00010001, 0x20c003fd, 0x00000000, 0x00000000 },
-   { 0x00000001, 0x212003bd, 0x000000c0, 0x00000000 },
-   { 0x00000001, 0x212403bd, 0x000000bc, 0x00000000 },
-   { 0x00000001, 0x213403bd, 0x00000038, 0x00000000 },
-   { 0x00200001, 0x612803bd, 0x004500a4, 0x00000000 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
-   { 0x00800001, 0x20000022, 0x008d0100, 0x00000000 },
-   { 0x02000031, 0x25401cc9, 0x00000000, 0x044bb401 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
-   { 0x00800001, 0x20400022, 0x008d0100, 0x00000000 },
-   { 0x02000031, 0x25c01cc9, 0x00000040, 0x048bb802 },
-   { 0x00000001, 0x240803bc, 0x000000a4, 0x00000000 },
-   { 0x00000048, 0x24087fbc, 0x000000bc, 0x41000000 },
-   { 0x00000048, 0x21287fbd, 0x000000c0, 0x41e00000 },
-   { 0x00000001, 0x240403bc, 0x000000bc, 0x00000000 },
-   { 0x00000048, 0x21247fbd, 0x000000c0, 0x41000000 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
-   { 0x00800001, 0x20000022, 0x008d0100, 0x00000000 },
-   { 0x02000031, 0x27401cc9, 0x00000000, 0x044bb401 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
-   { 0x00800001, 0x20400022, 0x008d0100, 0x00000000 },
-   { 0x02000031, 0x27c01cc9, 0x00000040, 0x048bb802 },
-   { 0x00600001, 0x21400229, 0x00aa0541, 0x00000000 },
-   { 0x00600001, 0x21600229, 0x00aa0549, 0x00000000 },
-   { 0x00600001, 0x21800229, 0x00aa0561, 0x00000000 },
-   { 0x00600001, 0x21a00229, 0x00aa0569, 0x00000000 },
-   { 0x00600001, 0x21c00229, 0x00aa0581, 0x00000000 },
-   { 0x00600001, 0x21e00229, 0x00aa0589, 0x00000000 },
-   { 0x00600001, 0x22000229, 0x00aa05a1, 0x00000000 },
-   { 0x00600001, 0x22200229, 0x00aa05a9, 0x00000000 },
-   { 0x00600001, 0x23400229, 0x00aa05c1, 0x00000000 },
-   { 0x00600001, 0x23600229, 0x00aa05c9, 0x00000000 },
-   { 0x00600001, 0x23800229, 0x00aa05e1, 0x00000000 },
-   { 0x00600001, 0x23a00229, 0x00aa05e9, 0x00000000 },
-   { 0x00600001, 0x23c00229, 0x00aa0641, 0x00000000 },
-   { 0x00600001, 0x23e00229, 0x00aa0649, 0x00000000 },
-   { 0x00600001, 0x24000229, 0x00aa0661, 0x00000000 },
-   { 0x00600001, 0x24200229, 0x00aa0669, 0x00000000 },
-   { 0x00600001, 0x22400229, 0x00aa0601, 0x00000000 },
-   { 0x00600001, 0x22600229, 0x00aa0609, 0x00000000 },
-   { 0x00600001, 0x22800229, 0x00aa0621, 0x00000000 },
-   { 0x00600001, 0x22a00229, 0x00aa0629, 0x00000000 },
-   { 0x00600001, 0x22c00229, 0x00aa0681, 0x00000000 },
-   { 0x00600001, 0x22e00229, 0x00aa0689, 0x00000000 },
-   { 0x00600001, 0x23000229, 0x00aa06a1, 0x00000000 },
-   { 0x00600001, 0x23200229, 0x00aa06a9, 0x00000000 },
-   { 0x00600001, 0x21500229, 0x00aa0741, 0x00000000 },
-   { 0x00600001, 0x21700229, 0x00aa0749, 0x00000000 },
-   { 0x00600001, 0x21900229, 0x00aa0761, 0x00000000 },
-   { 0x00600001, 0x21b00229, 0x00aa0769, 0x00000000 },
-   { 0x00600001, 0x21d00229, 0x00aa0781, 0x00000000 },
-   { 0x00600001, 0x21f00229, 0x00aa0789, 0x00000000 },
-   { 0x00600001, 0x22100229, 0x00aa07a1, 0x00000000 },
-   { 0x00600001, 0x22300229, 0x00aa07a9, 0x00000000 },
-   { 0x00600001, 0x23500229, 0x00aa07c1, 0x00000000 },
-   { 0x00600001, 0x23700229, 0x00aa07c9, 0x00000000 },
-   { 0x00600001, 0x23900229, 0x00aa07e1, 0x00000000 },
-   { 0x00600001, 0x23b00229, 0x00aa07e9, 0x00000000 },
-   { 0x00600001, 0x23d00229, 0x00aa0841, 0x00000000 },
-   { 0x00600001, 0x23f00229, 0x00aa0849, 0x00000000 },
-   { 0x00600001, 0x24100229, 0x00aa0861, 0x00000000 },
-   { 0x00600001, 0x24300229, 0x00aa0869, 0x00000000 },
-   { 0x00600001, 0x22500229, 0x00aa0801, 0x00000000 },
-   { 0x00600001, 0x22700229, 0x00aa0809, 0x00000000 },
-   { 0x00600001, 0x22900229, 0x00aa0821, 0x00000000 },
-   { 0x00600001, 0x22b00229, 0x00aa0829, 0x00000000 },
-   { 0x00600001, 0x22d00229, 0x00aa0881, 0x00000000 },
-   { 0x00600001, 0x22f00229, 0x00aa0889, 0x00000000 },
-   { 0x00600001, 0x23100229, 0x00aa08a1, 0x00000000 },
-   { 0x00600001, 0x23300229, 0x00aa08a9, 0x00000000 },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
-   { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
-   { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
-   { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
-   { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
-   { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
-   { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
-   { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x04600031, 0x28000cc1, 0x00000020, 0x02298008 },
-   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
-   { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
-   { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
-   { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
-   { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
-   { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
-   { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
-   { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
-   { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
-   { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
-   { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
-   { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
-   { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
-   { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
-   { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
-   { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
-   { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
-   { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
-   { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
-   { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
-   { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
-   { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
-   { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
-   { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
-   { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
-   { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
-   { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
-   { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
-   { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
-   { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
-   { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
-   { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
-   { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
-   { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
-   { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
-   { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
-   { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
-   { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
-   { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
-   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
-   { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
-   { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
-   { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
-   { 0x05600031, 0x20000cc4, 0x00000020, 0x06094008 },
-   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
-   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
-   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
-   { 0x00000041, 0x24407fbd, 0x000000bc, 0x41800000 },
-   { 0x00000040, 0x20a477bd, 0x00000440, 0x000000a4 },
-   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
-   { 0x00010220, 0x34001c00, 0x02001400, 0xfffffede },
-   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
-   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
-   { 0x00000001, 0x20a403bd, 0x00000094, 0x00000000 },
-   { 0x00000041, 0x24407fbd, 0x00000038, 0x41000000 },
-   { 0x00000040, 0x20a877bd, 0x00000440, 0x000000a8 },
-   { 0x00000220, 0x34001c00, 0x00001400, 0xfffffed2 },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
-   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
-   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
-   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/nv12_dndi_nv12.g4b.gen5 b/src/shaders/post_processing/nv12_dndi_nv12.g4b.gen5
deleted file mode 100644
index 6c0474d..0000000
--- a/src/shaders/post_processing/nv12_dndi_nv12.g4b.gen5
+++ /dev/null
@@ -1,86 +0,0 @@
-   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
-   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
-   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
-   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
-   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
-   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
-   { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
-   { 0x01600031, 0x24400c01, 0x208d0000, 0x04cb8004 },
-   { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
-   { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
-   { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
-   { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
-   { 0x00600001, 0x22400229, 0x00ae0481, 0x00000000 },
-   { 0x00600001, 0x23400229, 0x00ae0480, 0x00000000 },
-   { 0x00600001, 0x22500229, 0x00ae0491, 0x00000000 },
-   { 0x00600001, 0x23500229, 0x00ae0490, 0x00000000 },
-   { 0x00600001, 0x22600229, 0x00ae04a1, 0x00000000 },
-   { 0x00600001, 0x23600229, 0x00ae04a0, 0x00000000 },
-   { 0x00600001, 0x22700229, 0x00ae04b1, 0x00000000 },
-   { 0x00600001, 0x23700229, 0x00ae04b0, 0x00000000 },
-   { 0x00800001, 0x21c00229, 0x00b104c0, 0x00000000 },
-   { 0x00800001, 0x21e00229, 0x00b104d0, 0x00000000 },
-   { 0x00800001, 0x22000229, 0x00b104e0, 0x00000000 },
-   { 0x00800001, 0x22200229, 0x00b104f0, 0x00000000 },
-   { 0x00600001, 0x22800229, 0x00ae0501, 0x00000000 },
-   { 0x00600001, 0x23800229, 0x00ae0500, 0x00000000 },
-   { 0x00600001, 0x22900229, 0x00ae0511, 0x00000000 },
-   { 0x00600001, 0x23900229, 0x00ae0510, 0x00000000 },
-   { 0x00600001, 0x22a00229, 0x00ae0521, 0x00000000 },
-   { 0x00600001, 0x23a00229, 0x00ae0520, 0x00000000 },
-   { 0x00600001, 0x22b00229, 0x00ae0531, 0x00000000 },
-   { 0x00600001, 0x23b00229, 0x00ae0530, 0x00000000 },
-   { 0x00000008, 0x21003da1, 0x000000a0, 0x00010001 },
-   { 0x00000001, 0x210401a1, 0x000000a2, 0x00000000 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
-   { 0x00600001, 0x21600022, 0x008d0100, 0x00000000 },
-   { 0x00600001, 0x21800022, 0x008d0540, 0x00000000 },
-   { 0x0b600031, 0x20000c04, 0x508d0000, 0x04082014 },
-   { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
-   { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x00000003 },
-   { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
-   { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 },
-   { 0x0d600031, 0x20000c04, 0x508d0000, 0x04082014 },
-   { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
-   { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 },
-   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a },
-   { 0x00400001, 0x20400022, 0x00690580, 0x00000000 },
-   { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 },
-   { 0x00400001, 0x20600022, 0x00690590, 0x00000000 },
-   { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 },
-   { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 },
-   { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 },
-   { 0x00400001, 0x20500022, 0x00690580, 0x00000000 },
-   { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 },
-   { 0x00400001, 0x20700022, 0x00690590, 0x00000000 },
-   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x01600031, 0x20000c04, 0x508d0000, 0x06082007 },
-   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
-   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x01600031, 0x28000c01, 0x408d0000, 0x0218a002 },
-   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
-   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x00600001, 0x20400022, 0x008d0800, 0x00000000 },
-   { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
-   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
-   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
-   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
-   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
-   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff70 },
-   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
-   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
-   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff6a },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
-   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
-   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
-   { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/nv12_dndi_nv12.g6b b/src/shaders/post_processing/nv12_dndi_nv12.g6b
deleted file mode 100644
index cb99eff..0000000
--- a/src/shaders/post_processing/nv12_dndi_nv12.g6b
+++ /dev/null
@@ -1,159 +0,0 @@
-   { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
-   { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
-   { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
-   { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
-   { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
-   { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
-   { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
-   { 0x02600031, 0x24400cc1, 0x00000020, 0x04cb8004 },
-   { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
-   { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
-   { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
-   { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
-   { 0x00600001, 0x22400229, 0x00ae0481, 0x00000000 },
-   { 0x00600001, 0x23400229, 0x00ae0480, 0x00000000 },
-   { 0x00600001, 0x22500229, 0x00ae0491, 0x00000000 },
-   { 0x00600001, 0x23500229, 0x00ae0490, 0x00000000 },
-   { 0x00600001, 0x22600229, 0x00ae04a1, 0x00000000 },
-   { 0x00600001, 0x23600229, 0x00ae04a0, 0x00000000 },
-   { 0x00600001, 0x22700229, 0x00ae04b1, 0x00000000 },
-   { 0x00600001, 0x23700229, 0x00ae04b0, 0x00000000 },
-   { 0x00800001, 0x21c00229, 0x00b104c0, 0x00000000 },
-   { 0x00800001, 0x21e00229, 0x00b104d0, 0x00000000 },
-   { 0x00800001, 0x22000229, 0x00b104e0, 0x00000000 },
-   { 0x00800001, 0x22200229, 0x00b104f0, 0x00000000 },
-   { 0x00600001, 0x22800229, 0x00ae0501, 0x00000000 },
-   { 0x00600001, 0x23800229, 0x00ae0500, 0x00000000 },
-   { 0x00600001, 0x22900229, 0x00ae0511, 0x00000000 },
-   { 0x00600001, 0x23900229, 0x00ae0510, 0x00000000 },
-   { 0x00600001, 0x22a00229, 0x00ae0521, 0x00000000 },
-   { 0x00600001, 0x23a00229, 0x00ae0520, 0x00000000 },
-   { 0x00600001, 0x22b00229, 0x00ae0531, 0x00000000 },
-   { 0x00600001, 0x23b00229, 0x00ae0530, 0x00000000 },
-   { 0x00000008, 0x21003da1, 0x000000a0, 0x00010001 },
-   { 0x00000001, 0x210401a1, 0x000000a2, 0x00000000 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
-   { 0x00600001, 0x21600022, 0x008d0100, 0x00000000 },
-   { 0x00600001, 0x21800022, 0x008d0540, 0x00000000 },
-   { 0x05600031, 0x20000cc4, 0x00000160, 0x04094014 },
-   { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
-   { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x00000003 },
-   { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
-   { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 },
-   { 0x05600031, 0x20000cc4, 0x000001a0, 0x04094014 },
-   { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
-   { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 },
-   { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a },
-   { 0x00400001, 0x20400022, 0x00690580, 0x00000000 },
-   { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 },
-   { 0x00400001, 0x20600022, 0x00690590, 0x00000000 },
-   { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 },
-   { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 },
-   { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 },
-   { 0x00400001, 0x20500022, 0x00690580, 0x00000000 },
-   { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 },
-   { 0x00400001, 0x20700022, 0x00690590, 0x00000000 },
-   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x05600031, 0x20000cc4, 0x00000020, 0x06094007 },
-   { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
-   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x04600031, 0x28000cc1, 0x00000020, 0x02198002 },
-   { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
-   { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
-   { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
-   { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
-   { 0x00600001, 0x20400022, 0x008d0800, 0x00000000 },
-   { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
-   { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
-   { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
-   { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
-   { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
-   { 0x00010220, 0x34001c00, 0x02001400, 0xffffff70 },
-   { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
-   { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
-   { 0x00000220, 0x34001c00, 0x00001400, 0xffffff6a },
-   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
-   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
-   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
-   { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
-   { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/render/Makefile.am b/src/shaders/render/Makefile.am
index f9540b0..dac58c7 100644
--- a/src/shaders/render/Makefile.am
+++ b/src/shaders/render/Makefile.am
@@ -64,17 +64,23 @@ INTEL_G7B =				\
 	exa_wm_write.g7b 		\
 	exa_wm_yuv_rgb.g7b
 
+# XXX: only regenerate binary for EU code containing JMPI instructions
+INTEL_G7B_HASWELL = \
+	exa_wm_src_sample_planar.g7b.haswell	\
+	$(NULL)
+
 TARGETS  =
 if HAVE_GEN4ASM
 TARGETS += $(INTEL_G4B)
 TARGETS += $(INTEL_G4B_GEN5)
 TARGETS += $(INTEL_G6B)
 TARGETS += $(INTEL_G7B)
+TARGETS += $(INTEL_G7B_HASWELL)
 endif
 
 all-local: $(TARGETS)
 
-SUFFIXES = .g4a .g4s .g4b .g6a .g6s .g6b .g7a .g7s .g7b
+SUFFIXES = .g4a .g4s .g4b .g6a .g6s .g6b .g7a .g7s .g7b .g7b.haswell
 
 if HAVE_GEN4ASM
 $(INTEL_G4S): $(INTEL_G4A) $(INTEL_G4I)
@@ -96,6 +102,8 @@ $(INTEL_G7S): $(INTEL_G7A) $(INTEL_G7I)
 	$(AM_V_GEN)m4 $< > $@
 .g7s.g7b:
 	$(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $<
+.g7s.g7b.haswell:
+	$(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $<
 endif
 
 CLEANFILES = \
@@ -113,6 +121,7 @@ EXTRA_DIST = \
 	$(INTEL_G6B)		\
 	$(INTEL_G7A)		\
 	$(INTEL_G7B)		\
+	$(INTEL_G7B_HASWELL)	\
 	$(NULL)
 
 # Extra clean files so that maintainer-clean removes *everything*
diff --git a/src/shaders/render/exa_wm_src_sample_planar.g7b.haswell b/src/shaders/render/exa_wm_src_sample_planar.g7b.haswell
new file mode 100644
index 0000000..dc388c2
--- /dev/null
+++ b/src/shaders/render/exa_wm_src_sample_planar.g7b.haswell
@@ -0,0 +1,20 @@
+   { 0x01000010, 0x20002d3c, 0x000000c0, 0x00010001 },
+   { 0x00010020, 0x34001c00, 0x00001400, 0x000000c0 },
+   { 0x01000010, 0x20002d3c, 0x000000c0, 0x00020002 },
+   { 0x00010020, 0x34001c00, 0x00001400, 0x00000070 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+   { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x22001ca9, 0x00000820, 0x0a2c0203 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+   { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x22401ca9, 0x00000820, 0x0a2c0405 },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x00000060 },
+   { 0x00800201, 0x220003fd, 0x00000000, 0x3f000000 },
+   { 0x00800201, 0x224003fd, 0x00000000, 0x3f000000 },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x00000030 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000c000 },
+   { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x22001ca9, 0x00000820, 0x0a4c0203 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+   { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x21c01ca9, 0x00000820, 0x0a2c0001 },
diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am
index d58a0be..12f0e28 100644
--- a/src/shaders/vme/Makefile.am
+++ b/src/shaders/vme/Makefile.am
@@ -1,4 +1,5 @@
 VME_CORE	= intra_frame.asm inter_frame.asm
+VME75_CORE	= intra_frame_haswell.asm inter_frame_haswell.asm
 
 INTEL_G6B	= intra_frame.g6b inter_frame.g6b
 INTEL_G6A	= intra_frame.g6a inter_frame.g6a
@@ -10,15 +11,21 @@ INTEL_G7A	= intra_frame.g7a inter_frame.g7a
 INTEL_GEN7_INC	= gen7_vme_header.inc
 INTEL_GEN7_ASM	= $(INTEL_G7A:%.g7a=%.gen7.asm)
 
+INTEL_G75B	= intra_frame_haswell.g75b inter_frame_haswell.g75b
+INTEL_G75A	= intra_frame_haswell.g75a inter_frame_haswell.g75a
+INTEL_GEN75_INC	= vme75.inc
+INTEL_GEN75_ASM	= $(INTEL_G75A:%.g75a=%.gen75.asm)
+
 TARGETS  =
 if HAVE_GEN4ASM
 TARGETS += $(INTEL_G6B)
 TARGETS += $(INTEL_G7B)
+TARGETS += $(INTEL_G75B)
 endif
 
 all-local: $(TARGETS)
 
-SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm
+SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm
 
 if HAVE_GEN4ASM
 $(INTEL_GEN6_ASM): $(VME_CORE) $(INTEL_GEN6_INC)
@@ -32,18 +39,31 @@ $(INTEL_GEN7_ASM): $(VME_CORE) $(INTEL_GEN7_INC)
 	$(AM_V_GEN)m4 $< > $@
 .gen7.asm.g7b:
 	$(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $<
+
+
+$(INTEL_GEN75_ASM): $(VME75_CORE) $(INTEL_GEN75_INC)
+.g75a.gen75.asm:
+	$(AM_V_GEN)cpp -P $< > _vme0.$@ 	&& \
+	m4 _vme0.$@ > $@				&& \
+	rm _vme0.$@
+.gen75.asm.g75b:
+	$(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $<
 endif
 
-CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM)
+CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM)
 
 EXTRA_DIST = \
 	$(INTEL_G6A)		\
 	$(INTEL_G6B)		\
 	$(INTEL_G7A)		\
 	$(INTEL_G7B)		\
+	$(INTEL_G75A)		\
+	$(INTEL_G75B)		\
 	$(INTEL_GEN6_INC)	\
 	$(INTEL_GEN7_INC)	\
+	$(INTEL_GEN75_INC)	\
 	$(VME_CORE)		\
+	$(VME75_CORE)		\
 	$(NULL)
 
 # Extra clean files so that maintainer-clean removes *everything*
diff --git a/src/shaders/vme/inter_frame_haswell.asm b/src/shaders/vme/inter_frame_haswell.asm
new file mode 100644
index 0000000..b6f8eb5
--- /dev/null
+++ b/src/shaders/vme/inter_frame_haswell.asm
@@ -0,0 +1,405 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: IntraFrame.asm
+//
+// Make intra predition estimation for Intra frame
+//
+
+//
+//  Now, begin source code....
+//
+
+/*
+ * __START
+ */
+__INTRA_START:
+mov  (16) tmp_reg0.0<1>:UD      0x0:UD {align1};
+mov  (16) tmp_reg2.0<1>:UD      0x0:UD {align1};
+mov  (16) tmp_reg4.0<1>:UD      0x0:UD {align1} ;
+mov  (16) tmp_reg6.0<1>:UD      0x0:UD {align1} ;
+
+shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
+add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
+add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */ 
+mov  (1) read0_header.8<1>:UD   BLOCK_32X1 {align1};
+mov  (1) read0_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
+
+shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
+add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
+mov  (1) read1_header.8<1>:UD   BLOCK_4X16 {align1};
+mov  (1) read1_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
+        
+shl  (2) vme_m0.8<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
+mov  (1) vme_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
+
+mul  (1) obw_m0.8<1>:UD         w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 24:UD {align1};
+mov  (1) obw_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
+        
+/*
+ * Media Read Message -- fetch Luma neighbor edge pixels 
+ */
+/* ROW */
+mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};        
+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};                
+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
+        
+/* m2, get the MV/Mb cost passed from constant buffer when
+spawning thread by MEDIA_OBJECT */       
+mov (8) vme_m2<1>:UD            r1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2<1>:UD		vme_m2.0<8,8,1>:UD {align1};
+
+/* m3 */
+mov (8) vme_msg_3<1>:UD		0x0:UD {align1};	        
+
+/* m4 */
+mov  (1) INEP_ROW.0<1>:UD       0x0:UD {align1};
+and  (1) INEP_ROW.4<1>:UD       INEP_ROW.4<0,1,0>:UD            0xFF000000:UD {align1};
+mov  (8) vme_msg_4<1>:UD         INEP_ROW.0<8,8,1>:UD {align1};
+
+/* m5 */        
+mov  (8) vme_msg_5<1>:UD         0x0:UD {align1};
+mov (16) vme_msg_5.0<1>:UB       INEP_COL0.3<32,8,4>:UB {align1};
+mov  (1) vme_msg_5.16<1>:UD      INTRA_PREDICTORE_MODE {align1};
+
+/* the penalty for Intra mode */
+mov  (1) vme_msg_5.28<1>:UD	0x010101:UD {align1};
+
+
+/* m6 */
+
+mov (8) vme_msg_6<1>:UD		0x0:UD {align1};	        
+
+/*
+ * SIC VME message
+ */
+/* m0 */        
+mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
+mov  (1) tmp_reg0.0<1>:UW	LUMA_INTRA_MODE:UW {align1};
+/* Use the Luma mode */
+mov  (1) vme_msg_4.5<1>:UB	tmp_reg0.0<0,1,0>:UB {align1};
+
+/* m1 */
+mov  (1) intra_flag<1>:UW       0x0:UW {align1}                     ;
+and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
+(f0.0) mov  (1) intra_part_mask_ub<1>:UB  LUMA_INTRA_8x8_DISABLE {align1};
+
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; 
+
+/* Disable DC HAAR component when calculating HARR SATD block */
+mov  (1) tmp_reg0.0<1>:UW	DC_HARR_DISABLE:UW		{align1};
+mov  (1) vme_m1.30<1>:UB	tmp_reg0.0<0,1,0>:UB  {align1};
+
+mov  (1) vme_m0.12<1>:UD        INTRA_SAD_HAAR:UD {align1};    /* 16x16 Source, Intra_harr */
+/* m0 */        
+mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
+mov  (8) vme_msg_1<1>:UD        vme_m1.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+        vme_msg_ind
+        vme_wb<1>:UD
+        null
+        cre(
+                BIND_IDX_VME,
+                VME_SIC_MESSAGE_TYPE
+        )
+        mlen sic_vme_msg_length
+        rlen vme_wb_length
+        {align1};
+/*
+ * Oword Block Write message
+ */
+mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
+        
+mov  (1) msg_reg1.0<1>:UD       vme_wb.0<0,1,0>:UD      {align1};
+mov  (1) msg_reg1.4<1>:UD       vme_wb.16<0,1,0>:UD     {align1};
+mov  (1) msg_reg1.8<1>:UD       vme_wb.20<0,1,0>:UD     {align1};
+mov  (1) msg_reg1.12<1>:UD      vme_wb.24<0,1,0>:UD     {align1};
+
+/* Distortion, Intra (17-16), */
+mov  (1) msg_reg1.16<1>:UW      vme_wb.12<0,1,0>:UW     {align1};
+
+mov  (1) msg_reg1.20<1>:UD      vme_wb.8<0,1,0>:UD     {align1};
+/* VME clock counts */
+mov  (1) msg_reg1.24<1>:UD      vme_wb.28<0,1,0>:UD     {align1};
+
+mov  (1) msg_reg1.28<1>:UD      obw_m0.8<0,1,0>:UD     {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+        msg_ind
+        obw_wb
+        null
+        data_port(
+                OBW_CACHE_TYPE,
+                OBW_MESSAGE_TYPE,
+                OBW_CONTROL_2,
+                OBW_BIND_IDX,
+                OBW_WRITE_COMMIT_CATEGORY,
+                OBW_HEADER_PRESENT
+        )
+        mlen 2
+        rlen obw_wb_length
+        {align1};
+
+/* IME search */
+mov  (1) vme_m0.12<1>:UD        SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1};    /* 16x16 Source, harr */
+mov  (1) vme_m0.22<1>:UW        REF_REGION_SIZE {align1};         /* Reference Width&Height, 48x40 */
+
+mov  (1) vme_m0.0<1>:UD		vme_m0.8<0,1,0>:UD      {align1};
+
+add  (1) vme_m0.0<1>:W          vme_m0.0<0,1,0>:W -16:W {align1};		/* Reference = (x-16,y-12)-(x+32,y+28) */
+add  (1) vme_m0.2<1>:W          vme_m0.2<0,1,0>:W -12:W {align1};
+
+mov  (1) vme_m0.0<1>:W		-16:W			{align1};
+mov  (1) vme_m0.2<1>:W		-12:W			{align1};
+
+mov  (1) vme_m0.4<1>:UD		vme_m0.0<0,1,0>:UD	{align1};
+
+mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
+
+mov  (1) vme_m1.0<1>:UD         ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
+mov  (1) vme_m1.4<1>:UD         MAX_NUM_MV:UD {align1};                                   /* Default value MAX 32 MVs */
+mov  (1) vme_m1.8<1>:UD         START_CENTER + SEARCH_PATH_LEN:UD {align1};
+mov  (8) vme_msg_1.0<1>:UD      vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2<1>:UD		vme_m2.0<8,8,1>:UD {align1};
+/* M3/M4 search path */
+
+mov  (1) vme_msg_3.0<1>:UD	0x01010101:UD {align1};
+mov  (1) vme_msg_3.4<1>:UD	0x10010101:UD {align1};
+mov  (1) vme_msg_3.8<1>:UD	0x0F0F0F0F:UD {align1};
+mov  (1) vme_msg_3.12<1>:UD	0x100F0F0F:UD {align1};
+mov  (1) vme_msg_3.16<1>:UD	0x01010101:UD {align1};
+mov  (1) vme_msg_3.20<1>:UD	0x10010101:UD {align1};
+mov  (1) vme_msg_3.24<1>:UD	0x0F0F0F0F:UD {align1};
+mov  (1) vme_msg_3.28<1>:UD	0x100F0F0F:UD {align1};
+
+mov  (1) vme_msg_4.0<1>:UD	0x01010101:UD {align1};
+mov  (1) vme_msg_4.4<1>:UD	0x10010101:UD {align1};
+mov  (1) vme_msg_4.8<1>:UD	0x0F0F0F0F:UD {align1};
+mov  (1) vme_msg_4.12<1>:UD	0x000F0F0F:UD {align1};
+
+mov  (4) vme_msg_4.16<1>:UD	0x0:UD {align1};
+
+send (8)
+        vme_msg_ind
+        vme_wb<1>:UD
+        null
+        vme(
+                BIND_IDX_VME,
+                0,
+                0,
+                VME_IME_MESSAGE_TYPE
+        )
+        mlen ime_vme_msg_length
+        rlen vme_wb_length {align1};
+
+/* Set Macroblock-shape/mode for FBR */
+
+mov  (1) vme_m2.20<1>:UD	0x0:UD {align1};
+mov  (1) vme_m2.21<1>:UB	vme_wb.25<0,1,0>:UB	{align1};
+mov  (1) vme_m2.22<1>:UB	vme_wb.26<0,1,0>:UB	{align1};
+
+and  (1) tmp_reg0.0<1>:UW	vme_wb.0<0,1,0>:UW	0x03:UW {align1};
+mov  (1) vme_m2.20<1>:UB	tmp_reg0.0<0,1,0>:UB    {align1};
+
+/* Write IME inter info */
+add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
+
+mov  (1) msg_reg1.0<1>:UD       vme_wb.0<0,1,0>:UD      {align1};
+
+mov  (1) msg_reg1.4<1>:UD       vme_wb.24<0,1,0>:UD     {align1};
+/* Inter distortion of IME */
+mov  (1) msg_reg1.8<1>:UD       vme_wb.8<0,1,0>:UD     {align1};
+
+mov  (1) msg_reg1.12<1>:UD	obw_m0.8<0,1,0>:UD {align1};
+
+/* bind index 3, write  oword (16bytes), msg type: 8(OWord Block Write) */
+send (16)
+        msg_ind
+        obw_wb
+        null
+        data_port(
+                OBW_CACHE_TYPE,
+                OBW_MESSAGE_TYPE,
+                OBW_CONTROL_0,
+                OBW_BIND_IDX,
+                OBW_WRITE_COMMIT_CATEGORY,
+                OBW_HEADER_PRESENT
+        )
+        mlen 2
+        rlen obw_wb_length
+        {align1};
+
+/* Write IME MV */
+add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x01:UD {align1};
+mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
+
+mov  (8) msg_reg1.0<1>:UD       vme_wb1.0<8,8,1>:UD {align1};
+mov  (8) msg_reg2.0<1>:ud       vme_wb2.0<8,8,1>:ud {align1};
+mov  (8) msg_reg3.0<1>:ud       vme_wb3.0<8,8,1>:ud {align1};
+mov  (8) msg_reg4.0<1>:ud       vme_wb4.0<8,8,1>:ud {align1};                
+/* bind index 3, write  8 oword (128 bytes), msg type: 8(OWord Block Write) */
+send (16)
+        msg_ind
+        obw_wb
+        null
+        data_port(
+                OBW_CACHE_TYPE,
+                OBW_MESSAGE_TYPE,
+                OBW_CONTROL_8,
+                OBW_BIND_IDX,
+                OBW_WRITE_COMMIT_CATEGORY,
+                OBW_HEADER_PRESENT
+        )
+        mlen 5
+        rlen obw_wb_length
+        {align1};
+
+/* Write IME RefID */
+add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x08:UD {align1};
+mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
+
+mov  (8) msg_reg1.0<1>:UD	vme_wb6.0<8,8,1>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+        msg_ind
+        obw_wb
+        null
+        data_port(
+                OBW_CACHE_TYPE,
+                OBW_MESSAGE_TYPE,
+                OBW_CONTROL_2,
+                OBW_BIND_IDX,
+                OBW_WRITE_COMMIT_CATEGORY,
+                OBW_HEADER_PRESENT
+        )
+        mlen 2
+        rlen obw_wb_length
+        {align1};
+
+/* Send FBR message into CRE */
+
+mov  (8) vme_msg_3.0<1>:UD       vme_wb1.0<8,8,1>:UD {align1};
+mov  (8) vme_msg_4.0<1>:ud       vme_wb2.0<8,8,1>:ud {align1};
+mov  (8) vme_msg_5.0<1>:ud       vme_wb3.0<8,8,1>:ud {align1};
+mov  (8) vme_msg_6.0<1>:ud       vme_wb4.0<8,8,1>:ud {align1};                
+
+mov  (1) vme_m0.12<1>:UD	INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER + FBR_BME_DISABLE:UD {align1};    /* 16x16 Source, 1/4 pixel, harr, BME disable */
+mov  (8) vme_msg_0.0<1>:UD	vme_m0.0<8,8,1>:UD  {align1};
+mov  (8) vme_msg_1.0<1>:UD	vme_m1.0<8,8,1>:UD  {align1};
+
+mov  (8) vme_msg_2.0<1>:UD		vme_m2.0<8,8,1>:UD	{align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+        vme_msg_ind
+        vme_wb<1>:UD
+        null
+        cre(
+                BIND_IDX_VME,
+                VME_FBR_MESSAGE_TYPE
+        )
+        mlen fbr_vme_msg_length
+        rlen vme_wb_length
+        {align1};
+
+add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
+/* write FME info */
+mov  (1) msg_reg1.0<1>:UD       vme_wb.0<0,1,0>:UD      {align1};
+
+mov  (1) msg_reg1.4<1>:UD       vme_wb.24<0,1,0>:UD     {align1};
+/* Inter distortion of FME */
+mov  (1) msg_reg1.8<1>:UD       vme_wb.8<0,1,0>:UD     {align1};
+
+mov  (1) msg_reg1.12<1>:UD	vme_m2.20<0,1,0>:UD {align1};
+
+/* bind index 3, write  oword (16bytes), msg type: 8(OWord Block Write) */
+send (16)
+        msg_ind
+        obw_wb
+        null
+        data_port(
+                OBW_CACHE_TYPE,
+                OBW_MESSAGE_TYPE,
+                OBW_CONTROL_0,
+                OBW_BIND_IDX,
+                OBW_WRITE_COMMIT_CATEGORY,
+                OBW_HEADER_PRESENT
+        )
+        mlen 2
+        rlen obw_wb_length
+        {align1};
+
+/* Write FME/BME MV */
+add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x01:UD {align1};
+mov  (8) msg_reg0.0<1>:UD       obw_m0.0<8,8,1>:UD {align1};
+
+
+mov  (8) msg_reg1.0<1>:UD       vme_wb1.0<8,8,1>:UD {align1};
+mov  (8) msg_reg2.0<1>:ud       vme_wb2.0<8,8,1>:ud {align1};
+mov  (8) msg_reg3.0<1>:ud       vme_wb3.0<8,8,1>:ud {align1};
+mov  (8) msg_reg4.0<1>:ud       vme_wb4.0<8,8,1>:ud {align1};                
+/* bind index 3, write  8 oword (128 bytes), msg type: 8(OWord Block Write) */
+send (16)
+        msg_ind
+        obw_wb
+        null
+        data_port(
+                OBW_CACHE_TYPE,
+                OBW_MESSAGE_TYPE,
+                OBW_CONTROL_8,
+                OBW_BIND_IDX,
+                OBW_WRITE_COMMIT_CATEGORY,
+                OBW_HEADER_PRESENT
+        )
+        mlen 5
+        rlen obw_wb_length
+        {align1};
+
+/* Write FME/BME RefID */
+add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x08:UD {align1};
+mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
+
+mov  (8) msg_reg1.0<1>:UD	vme_wb6.0<8,8,1>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+        msg_ind
+        obw_wb
+        null
+        data_port(
+                OBW_CACHE_TYPE,
+                OBW_MESSAGE_TYPE,
+                OBW_CONTROL_2,
+                OBW_BIND_IDX,
+                OBW_WRITE_COMMIT_CATEGORY,
+                OBW_HEADER_PRESENT
+        )
+        mlen 2
+        rlen obw_wb_length
+        {align1};
+
+__EXIT: 
+/*
+ * kill thread
+ */        
+mov  (8) ts_msg_reg0<1>:UD         r0<8,8,1>:UD {align1};
+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
diff --git a/src/shaders/vme/inter_frame_haswell.g75a b/src/shaders/vme/inter_frame_haswell.g75a
new file mode 100644
index 0000000..e95ed93
--- /dev/null
+++ b/src/shaders/vme/inter_frame_haswell.g75a
@@ -0,0 +1,2 @@
+#include "vme75.inc"
+#include "inter_frame_haswell.asm"
diff --git a/src/shaders/vme/inter_frame_haswell.g75b b/src/shaders/vme/inter_frame_haswell.g75b
new file mode 100644
index 0000000..86971d4
--- /dev/null
+++ b/src/shaders/vme/inter_frame_haswell.g75b
@@ -0,0 +1,137 @@
+   { 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
+   { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
+   { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
+   { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
+   { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+   { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
+   { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
+   { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
+   { 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
+   { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
+   { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
+   { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
+   { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
+   { 0x00000041, 0x24880c21, 0x00000488, 0x00000018 },
+   { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
+   { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
+   { 0x04600031, 0x23801cb1, 0x00000800, 0x02190004 },
+   { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 },
+   { 0x04600031, 0x23a01cb1, 0x00000800, 0x02290004 },
+   { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 },
+   { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00000005, 0x23840c21, 0x00000384, 0xff000000 },
+   { 0x00600001, 0x28800021, 0x008d0380, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x28a00231, 0x00cf03a3, 0x00000000 },
+   { 0x00000001, 0x28b00061, 0x00000000, 0x11111111 },
+   { 0x00000001, 0x28bc0061, 0x00000000, 0x00010101 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+   { 0x00000001, 0x24000169, 0x00000000, 0x00010001 },
+   { 0x00000001, 0x28850231, 0x00000400, 0x00000000 },
+   { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
+   { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
+   { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 },
+   { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 },
+   { 0x00000001, 0x24000169, 0x00000000, 0x00200020 },
+   { 0x00000001, 0x247e0231, 0x00000400, 0x00000000 },
+   { 0x00000001, 0x244c0061, 0x00000000, 0x00800000 },
+   { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+   { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+   { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e782000 },
+   { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+   { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+   { 0x00000001, 0x28240021, 0x00000190, 0x00000000 },
+   { 0x00000001, 0x28280021, 0x00000194, 0x00000000 },
+   { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 },
+   { 0x00000001, 0x28300129, 0x0000018c, 0x00000000 },
+   { 0x00000001, 0x28340021, 0x00000188, 0x00000000 },
+   { 0x00000001, 0x28380021, 0x0000019c, 0x00000000 },
+   { 0x00000001, 0x283c0021, 0x00000488, 0x00000000 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
+   { 0x00000001, 0x244c0061, 0x00000000, 0x00200000 },
+   { 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
+   { 0x00000001, 0x24400021, 0x00000448, 0x00000000 },
+   { 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 },
+   { 0x00000040, 0x24423dad, 0x00000442, 0xfff4fff4 },
+   { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
+   { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
+   { 0x00000001, 0x24440021, 0x00000440, 0x00000000 },
+   { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+   { 0x00000001, 0x24600061, 0x00000000, 0x00000002 },
+   { 0x00000001, 0x24640061, 0x00000000, 0x00000020 },
+   { 0x00000001, 0x24680061, 0x00000000, 0x30003030 },
+   { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+   { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
+   { 0x00000001, 0x28600061, 0x00000000, 0x01010101 },
+   { 0x00000001, 0x28640061, 0x00000000, 0x10010101 },
+   { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f },
+   { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f },
+   { 0x00000001, 0x28700061, 0x00000000, 0x01010101 },
+   { 0x00000001, 0x28740061, 0x00000000, 0x10010101 },
+   { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f },
+   { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f },
+   { 0x00000001, 0x28800061, 0x00000000, 0x01010101 },
+   { 0x00000001, 0x28840061, 0x00000000, 0x10010101 },
+   { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f },
+   { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f },
+   { 0x00400001, 0x28900061, 0x00000000, 0x00000000 },
+   { 0x08600031, 0x21801ca1, 0x00000800, 0x0a784000 },
+   { 0x00000001, 0x25740061, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x25750231, 0x00000199, 0x00000000 },
+   { 0x00000001, 0x25760231, 0x0000019a, 0x00000000 },
+   { 0x00000005, 0x24002d29, 0x00000180, 0x00030003 },
+   { 0x00000001, 0x25740231, 0x00000400, 0x00000000 },
+   { 0x00000040, 0x24880c21, 0x00000488, 0x00000002 },
+   { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+   { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+   { 0x00000001, 0x28240021, 0x00000198, 0x00000000 },
+   { 0x00000001, 0x28280021, 0x00000188, 0x00000000 },
+   { 0x00000001, 0x282c0021, 0x00000488, 0x00000000 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 },
+   { 0x00000040, 0x24880c21, 0x00000488, 0x00000001 },
+   { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+   { 0x00600001, 0x28200021, 0x008d01a0, 0x00000000 },
+   { 0x00600001, 0x28400021, 0x008d01c0, 0x00000000 },
+   { 0x00600001, 0x28600021, 0x008d01e0, 0x00000000 },
+   { 0x00600001, 0x28800021, 0x008d0200, 0x00000000 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0403 },
+   { 0x00000040, 0x24880c21, 0x00000488, 0x00000008 },
+   { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+   { 0x00600001, 0x28200021, 0x008d0240, 0x00000000 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
+   { 0x00600001, 0x28600021, 0x008d01a0, 0x00000000 },
+   { 0x00600001, 0x28800021, 0x008d01c0, 0x00000000 },
+   { 0x00600001, 0x28a00021, 0x008d01e0, 0x00000000 },
+   { 0x00600001, 0x28c00021, 0x008d0200, 0x00000000 },
+   { 0x00000001, 0x244c0061, 0x00000000, 0x00243000 },
+   { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+   { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+   { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
+   { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e786000 },
+   { 0x00000040, 0x24880c21, 0x00000488, 0x00000002 },
+   { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+   { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+   { 0x00000001, 0x28240021, 0x00000198, 0x00000000 },
+   { 0x00000001, 0x28280021, 0x00000188, 0x00000000 },
+   { 0x00000001, 0x282c0021, 0x00000574, 0x00000000 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 },
+   { 0x00000040, 0x24880c21, 0x00000488, 0x00000001 },
+   { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+   { 0x00600001, 0x28200021, 0x008d01a0, 0x00000000 },
+   { 0x00600001, 0x28400021, 0x008d01c0, 0x00000000 },
+   { 0x00600001, 0x28600021, 0x008d01e0, 0x00000000 },
+   { 0x00600001, 0x28800021, 0x008d0200, 0x00000000 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0403 },
+   { 0x00000040, 0x24880c21, 0x00000488, 0x00000008 },
+   { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+   { 0x00600001, 0x28200021, 0x008d0240, 0x00000000 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
+   { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 },
+   { 0x07800031, 0x24001ca8, 0x00000e00, 0x82000010 },
diff --git a/src/shaders/vme/intra_frame_haswell.asm b/src/shaders/vme/intra_frame_haswell.asm
new file mode 100644
index 0000000..64efd55
--- /dev/null
+++ b/src/shaders/vme/intra_frame_haswell.asm
@@ -0,0 +1,160 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: IntraFrame.asm
+//
+// Make intra predition estimation for Intra frame
+//
+
+//
+//  Now, begin source code....
+//
+
+/*
+ * __START
+ */
+__INTRA_START:
+mov  (16) tmp_reg0.0<1>:UD      0x0:UD {align1};
+mov  (16) tmp_reg2.0<1>:UD      0x0:UD {align1};
+mov  (16) tmp_reg4.0<1>:UD      0x0:UD {align1} ;
+mov  (16) tmp_reg6.0<1>:UD      0x0:UD {align1} ;
+
+shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
+add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
+add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */ 
+mov  (1) read0_header.8<1>:UD   BLOCK_32X1 {align1};
+mov  (1) read0_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
+
+shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
+add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
+mov  (1) read1_header.8<1>:UD   BLOCK_4X16 {align1};
+mov  (1) read1_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
+        
+shl  (2) vme_m0.8<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
+mov  (1) vme_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
+
+mul  (1) obw_m0.8<1>:UD         w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov  (1) obw_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
+        
+/*
+ * Media Read Message -- fetch Luma neighbor edge pixels 
+ */
+/* ROW */
+mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};        
+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};                
+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
+        
+/* m2, get the MV/Mb cost passed by constant buffer 
+when creating EU thread by MEDIA_OBJECT */       
+mov (8) vme_msg_2<1>:UD         r1.0<8,8,1>:UD {align1};
+
+/* m3 */
+mov (8) vme_msg_3<1>:UD		0x0:UD {align1};	        
+
+/* m4 */
+mov  (1) INEP_ROW.0<1>:UD       0x0:UD {align1};
+and  (1) INEP_ROW.4<1>:UD       INEP_ROW.4<0,1,0>:UD            0xFF000000:UD {align1};
+mov  (8) vme_msg_4<1>:UD         INEP_ROW.0<8,8,1>:UD {align1};
+
+/* m5 */        
+mov  (8) vme_msg_5<1>:UD         0x0:UD {align1};
+mov (16) vme_msg_5.0<1>:UB       INEP_COL0.3<32,8,4>:UB {align1};
+mov  (1) vme_msg_5.16<1>:UD      INTRA_PREDICTORE_MODE {align1};
+
+/* the penalty for Intra mode */
+mov  (1) vme_msg_5.28<1>:UD	0x010101:UD {align1};
+
+
+/* m6 */
+
+mov (8) vme_msg_6<1>:UD		0x0:UD {align1};	        
+
+/*
+ * VME message
+ */
+/* m0 */        
+mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
+mov  (1) tmp_reg0.0<1>:UW	LUMA_INTRA_MODE:UW {align1};
+/* Use the Luma mode */
+mov  (1) vme_msg_4.5<1>:UB	tmp_reg0.0<0,1,0>:UB {align1};
+
+/* m1 */
+mov  (1) intra_flag<1>:UW       0x0:UW {align1}                     ;
+and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
+(f0.0) mov  (1) intra_part_mask_ub<1>:UB  LUMA_INTRA_8x8_DISABLE {align1};
+
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; 
+ 
+/* Disable DC HAAR component when calculating HARR SATD block */
+mov  (1) tmp_reg0.0<1>:UW	DC_HARR_DISABLE:UW		{align1};
+mov  (1) vme_m1.30<1>:UB	tmp_reg0.0<0,1,0>:UB  {align1};
+
+/* m0 */        
+mov  (8) vme_msg_0.0<1>:UD      vme_m0.0<8,8,1>:UD {align1};
+mov  (8) vme_msg_1<1>:UD        vme_m1.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+        vme_msg_ind
+        vme_wb<1>:UD
+        null
+        cre(
+                BIND_IDX_VME,
+                VME_SIC_MESSAGE_TYPE
+        )
+        mlen sic_vme_msg_length
+        rlen vme_wb_length
+        {align1};
+/*
+ * Oword Block Write message
+ */
+mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
+        
+mov  (1) msg_reg1.0<1>:UD       vme_wb.0<0,1,0>:UD      {align1};
+mov  (1) msg_reg1.4<1>:UD       vme_wb.16<0,1,0>:UD     {align1};
+mov  (1) msg_reg1.8<1>:UD       vme_wb.20<0,1,0>:UD     {align1};
+mov  (1) msg_reg1.12<1>:UD      vme_wb.24<0,1,0>:UD     {align1};
+
+/* Distortion, Intra (17-16), */
+mov  (1) msg_reg1.16<1>:UW      vme_wb.12<0,1,0>:UW     {align1};
+
+mov  (1) msg_reg1.20<1>:UD      vme_wb.8<0,1,0>:UD     {align1};
+/* VME clock counts */
+mov  (1) msg_reg1.24<1>:UD      vme_wb.28<0,1,0>:UD     {align1};
+
+mov  (1) msg_reg1.28<1>:UD      obw_m0.8<0,1,0>:UD     {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+        msg_ind
+        obw_wb
+        null
+        data_port(
+                OBW_CACHE_TYPE,
+                OBW_MESSAGE_TYPE,
+                OBW_CONTROL_2,
+                OBW_BIND_IDX,
+                OBW_WRITE_COMMIT_CATEGORY,
+                OBW_HEADER_PRESENT
+        )
+        mlen 2
+        rlen obw_wb_length
+        {align1};
+
+__EXIT: 
+/*
+ * kill thread
+ */        
+mov  (8) ts_msg_reg0<1>:UD         r0<8,8,1>:UD {align1};
+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
diff --git a/src/shaders/vme/intra_frame_haswell.g75a b/src/shaders/vme/intra_frame_haswell.g75a
new file mode 100644
index 0000000..a690fdd
--- /dev/null
+++ b/src/shaders/vme/intra_frame_haswell.g75a
@@ -0,0 +1,2 @@
+#include "vme75.inc"
+#include "intra_frame_haswell.asm"
diff --git a/src/shaders/vme/intra_frame_haswell.g75b b/src/shaders/vme/intra_frame_haswell.g75b
new file mode 100644
index 0000000..5ae7a99
--- /dev/null
+++ b/src/shaders/vme/intra_frame_haswell.g75b
@@ -0,0 +1,57 @@
+   { 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x24800061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x24c00061, 0x00000000, 0x00000000 },
+   { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
+   { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
+   { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
+   { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
+   { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+   { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
+   { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
+   { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
+   { 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
+   { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
+   { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
+   { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
+   { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
+   { 0x00000041, 0x24880c21, 0x00000488, 0x00000002 },
+   { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
+   { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
+   { 0x04600031, 0x23801cb1, 0x00000800, 0x02190004 },
+   { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 },
+   { 0x04600031, 0x23a01cb1, 0x00000800, 0x02290004 },
+   { 0x00600001, 0x28400021, 0x008d0020, 0x00000000 },
+   { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x23800061, 0x00000000, 0x00000000 },
+   { 0x00000005, 0x23840c21, 0x00000384, 0xff000000 },
+   { 0x00600001, 0x28800021, 0x008d0380, 0x00000000 },
+   { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x28a00231, 0x00cf03a3, 0x00000000 },
+   { 0x00000001, 0x28b00061, 0x00000000, 0x11111111 },
+   { 0x00000001, 0x28bc0061, 0x00000000, 0x00010101 },
+   { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+   { 0x00000001, 0x24000169, 0x00000000, 0x00010001 },
+   { 0x00000001, 0x28850231, 0x00000400, 0x00000000 },
+   { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
+   { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
+   { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 },
+   { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 },
+   { 0x00000001, 0x24000169, 0x00000000, 0x00200020 },
+   { 0x00000001, 0x247e0231, 0x00000400, 0x00000000 },
+   { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+   { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+   { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e782000 },
+   { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+   { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+   { 0x00000001, 0x28240021, 0x00000190, 0x00000000 },
+   { 0x00000001, 0x28280021, 0x00000194, 0x00000000 },
+   { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 },
+   { 0x00000001, 0x28300129, 0x0000018c, 0x00000000 },
+   { 0x00000001, 0x28340021, 0x00000188, 0x00000000 },
+   { 0x00000001, 0x28380021, 0x0000019c, 0x00000000 },
+   { 0x00000001, 0x283c0021, 0x00000488, 0x00000000 },
+   { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
+   { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 },
+   { 0x07800031, 0x24001ca8, 0x00000e00, 0x82000010 },
diff --git a/src/shaders/vme/vme75.inc b/src/shaders/vme/vme75.inc
new file mode 100644
index 0000000..d48daa0
--- /dev/null
+++ b/src/shaders/vme/vme75.inc
@@ -0,0 +1,268 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0.  The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: ME_header.inc
+//
+// Global symbols define
+//
+
+/*
+ * Constant
+ */
+define(`VME_MESSAGE_TYPE_INTER',        `1')
+define(`VME_MESSAGE_TYPE_INTRA',        `2')
+define(`VME_MESSAGE_TYPE_MIXED',        `3')
+        
+define(`VME_SIC_MESSAGE_TYPE',        `1')
+define(`VME_IME_MESSAGE_TYPE',        `2')
+define(`VME_FBR_MESSAGE_TYPE',        `3')
+
+define(`BLOCK_32X1',                    `0x0000001F')
+define(`BLOCK_4X16',                    `0x000F0003')
+define(`BLOCK_8X4',                     `0x00070003')
+        
+define(`LUMA_INTRA_16x16_DISABLE',      `0x1')
+define(`LUMA_INTRA_8x8_DISABLE',        `0x2')
+define(`LUMA_INTRA_4x4_DISABLE',        `0x4')
+
+define(`INTRA_PRED_AVAIL_FLAG_AE',      `0x60')
+define(`INTRA_PRED_AVAIL_FLAG_B',       `0x10')
+define(`INTRA_PRED_AVAIL_FLAG_C',       `0x8')
+define(`INTRA_PRED_AVAIL_FLAG_D',       `0x4')
+
+define(`BIND_IDX_VME',                  `0')
+define(`BIND_IDX_VME_REF0',             `1')
+define(`BIND_IDX_VME_REF1',             `2')
+define(`BIND_IDX_OUTPUT',               `3')
+define(`BIND_IDX_INEP',                 `4')
+
+define(`SUB_PEL_MODE_INTEGER',          `0x00000000')
+define(`SUB_PEL_MODE_HALF',             `0x00001000')
+define(`SUB_PEL_MODE_QUARTER',          `0x00003000')
+
+define(`INTER_SAD_NONE',                `0x00000000')
+define(`INTER_SAD_HAAR',                `0x00200000')
+
+define(`INTRA_SAD_NONE',                `0x00000000')
+define(`INTRA_SAD_HAAR',                `0x00800000')
+
+define(`INTER_PART_MASK',               `0x00000000')
+
+define(`SEARCH_CTRL_SINGLE',            `0x00000000')
+define(`SEARCH_CTRL_DUAL_START',        `0x00000100')
+define(`SEARCH_CTRL_DUAL_RECORD',       `0x00000300')
+define(`SEARCH_CTRL_DUAL_REFERENCE',    `0x00000700')
+
+define(`REF_REGION_SIZE',               `0x2830:UW')
+
+define(`BI_SUB_MB_PART_MASK',           `0x0c000000')
+define(`MAX_NUM_MV',                    `0x00000020')
+define(`FB_PRUNING_ENABLE',             `0x40000000')
+
+define(`SEARCH_PATH_LEN',               `0x00003030')
+define(`START_CENTER',                  `0x30000000')
+
+define(`ADAPTIVE_SEARCH_ENABLE',        `0x00000002') 
+define(`INTRA_PREDICTORE_MODE',         `0x11111111:UD')
+
+define(`INTER_VME_OUTPUT_IN_OWS',       `10')
+define(`INTER_VME_OUTPUT_MV_IN_OWS',    `8')
+
+define(`INTRAMBFLAG_MASK',              `0x00002000')
+define(`MVSIZE_UW_BASE',                `0x0040')
+define(`MFC_MV32_BIT_SHIFT',            `5')
+define(`CBP_DC_YUV_UW',                 `0x000E')
+
+define(`DC_HARR_ENABLE',                `0x0000')
+define(`DC_HARR_DISABLE',		`0x0020')
+
+define(`MV32_BIT_MASK',                 `0x0020')
+define(`MV32_BIT_SHIFT',                `5')
+
+define(`OBW_CACHE_TYPE',                `10')
+
+
+define(`OBW_MESSAGE_TYPE',              `8')
+
+define(`OBW_BIND_IDX',                  `BIND_IDX_OUTPUT')
+
+define(`OBW_CONTROL_0',                 `0')    /* 1 OWord, low 128 bits */
+define(`OBW_CONTROL_1',                 `1')    /* 1 OWord, high 128 bits */
+define(`OBW_CONTROL_2',                 `2')    /* 2 OWords */
+define(`OBW_CONTROL_3',                 `3')    /* 4 OWords */
+define(`OBW_CONTROL_8',                 `4')    /* 8 OWords */
+
+define(`FBR_BME_ENABLE',                 `0x00000000')
+define(`FBR_BME_DISABLE',                `0x00040000')
+
+define(`OBW_WRITE_COMMIT_CATEGORY',     `0')    /* category on Ivybridge */
+
+
+define(`OBW_HEADER_PRESENT',            `1')
+
+/* GRF registers
+ * r0 header
+ * r1~r4 constant buffer (reserved)
+ * r5 inline data
+ * r6~r11 reserved        
+ * r12 write back of VME message
+ * r13 write back of Oword Block Write        
+ */
+/*
+ * GRF 0 -- header       
+ */        
+define(`thread_id_ub',          `r0.20<0,1,0>:UB')  /* thread id in payload */
+
+/*
+ * GRF 1~4 -- Constant Buffer (reserved)
+ */
+        
+/*
+ * GRF 5 -- inline data
+ */        
+define(`inline_reg0',           `r5')
+define(`w_in_mb_uw',            `inline_reg0.2')
+define(`orig_xy_ub',            `inline_reg0.0')
+define(`orig_x_ub',             `inline_reg0.0')    /* in macroblock */    
+define(`orig_y_ub',             `inline_reg0.1')
+define(`transform_8x8_ub',      `inline_reg0.4')
+define(`slice_edge_ub',         `inline_reg0.4')
+define(`num_macroblocks',       `inline_reg0.6')
+define(`input_mb_intra_ub',     `inline_reg0.5')
+
+/*
+ * GRF 6~11 -- reserved
+ */
+
+/*
+ * GRF 12~15 -- write back for VME message 
+ */
+define(`vme_wb',                `r12')
+define(`vme_wb0',               `r12')
+define(`vme_wb1',               `r13')
+define(`vme_wb2',               `r14')
+define(`vme_wb3',               `r15')
+define(`vme_wb4',               `r16')
+define(`vme_wb5',               `r17')
+define(`vme_wb6',               `r18')
+define(`vme_ime_wb7',		`r19')
+define(`vme_ime_wb8',		`r20')
+define(`vme_ime_wb9',		`r21')
+define(`vme_ime_wb10',		`r22')
+
+
+/*
+ * GRF 24 -- write for VME output message
+ */
+define(`obw_wb',                `null<1>:W')
+define(`obw_wb_length',         `0')
+
+
+/*
+ * GRF 28~30 -- Intra Neighbor Edge Pixels
+ */
+define(`INEP_ROW',              `r28')
+define(`INEP_COL0',             `r29')
+define(`INEP_COL1',             `r30')
+        
+/*
+ * GRF 48~50 -- Chroma Neighbor Edge Pixels
+ */
+define(`CHROMA_ROW',              `r48')
+define(`CHROMA_COL',              `r49')
+
+/*
+ * temporary registers
+ */
+define(`tmp_reg0',              `r32')
+define(`read0_header',          `tmp_reg0')
+define(`tmp_reg1',              `r33')
+define(`read1_header',          `tmp_reg1')
+define(`tmp_reg2',              `r34')
+define(`vme_m0',                `tmp_reg2')
+define(`tmp_reg3',              `r35')                                
+define(`vme_m1',                `tmp_reg3')
+define(`intra_flag',            `vme_m1.28')
+define(`intra_part_mask_ub',    `vme_m1.28')        
+define(`mb_intra_struct_ub',    `vme_m1.29')
+define(`tmp_reg4',              `r36')
+define(`obw_m0',                `tmp_reg4')
+define(`tmp_reg5',              `r37')
+define(`obw_m1',                `tmp_reg5')
+define(`tmp_reg6',              `r38')
+define(`obw_m2',                `tmp_reg6')
+define(`tmp_reg7',              `r39')
+define(`obw_m3',                `tmp_reg7')
+define(`tmp_reg8',              `r40')
+define(`obw_m4',                `tmp_reg8')
+define(`tmp_reg9',              `r41')
+define(`tmp_x_w',               `tmp_reg9.0')
+define(`tmp_rega',              `r42')
+define(`tmp_ud0',               `tmp_rega.0')
+define(`tmp_ud1',               `tmp_rega.4')
+define(`tmp_ud2',               `tmp_rega.8')
+define(`tmp_ud3',               `tmp_rega.12')
+define(`tmp_uw0',               `tmp_rega.0')
+define(`tmp_uw1',               `tmp_rega.2')
+define(`tmp_uw2',               `tmp_rega.4')
+define(`tmp_uw3',               `tmp_rega.6')
+define(`tmp_uw4',               `tmp_rega.8')
+define(`tmp_uw5',               `tmp_rega.10')
+define(`tmp_uw6',               `tmp_rega.12')
+define(`tmp_uw7',               `tmp_rega.14')
+
+define(`vme_m2',                `r43')
+/*
+ * MRF registers
+ */        
+
+define(`msg_ind',               `64')
+define(`msg_reg0',              `r64')
+define(`msg_reg1',              `r65')
+define(`msg_reg2',              `r66')
+define(`msg_reg3',              `r67')
+define(`msg_reg4',              `r68')
+define(`msg_reg5',              `r69')
+define(`msg_reg6',              `r70')
+define(`msg_reg7',              `r71')
+define(`msg_reg8',              `r72')
+define(`msg_reg9',              `r73')
+
+define(`ts_msg_ind',               `112')
+define(`ts_msg_reg0',               `r112')
+/*
+ * VME message payload
+ */
+
+define(`vme_intra_wb_length',   `1')
+define(`vme_wb_length',		`7')
+define(`sic_vme_msg_length',	`7')
+define(`fbr_vme_msg_length',	`7')
+define(`ime_vme_msg_length',	`5')
+
+define(`vme_msg_ind',           `msg_ind')
+define(`vme_msg_0',             `msg_reg0')
+define(`vme_msg_1',             `msg_reg1')
+define(`vme_msg_2',             `msg_reg2')
+
+define(`vme_msg_3',             `msg_reg3')
+define(`vme_msg_4',             `msg_reg4')
+
+
+define(`vme_msg_5',             `msg_reg5')
+define(`vme_msg_6',             `msg_reg6')
+define(`vme_msg_7',             `msg_reg7')
+define(`vme_msg_8',             `msg_reg8')
+define(`vme_msg_9',             `msg_reg9')
+
+define(`BIND_IDX_CBCR',			`6')
+
+
+define(`LUMA_CHROMA_MODE',      `0x0')
+define(`LUMA_INTRA_MODE',	`0x1')
+define(`LUMA_INTRA_DISABLE',	`0x2')
diff --git a/src/sysdeps.h b/src/sysdeps.h
new file mode 100644
index 0000000..a713d20
--- /dev/null
+++ b/src/sysdeps.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SYSDEPS_H
+#define SYSDEPS_H
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <assert.h>
+
+#endif /* SYSDEPS_H */
diff --git a/src/va_backend_compat.h b/src/va_backend_compat.h
new file mode 100644
index 0000000..f5c9f75
--- /dev/null
+++ b/src/va_backend_compat.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef VA_BACKEND_COMPAT_H
+#define VA_BACKEND_COMPAT_H
+
+#include <va/va_backend.h>
+
+#if VA_CHECK_VERSION(0,33,0)
+# include <va/va_drmcommon.h>
+
+# define VA_CHECK_DRM_AUTH_TYPE(ctx, type) \
+    (((struct drm_state *)(ctx)->drm_state)->auth_type == (type))
+
+#else
+# include <va/va_dricommon.h>
+
+# define VA_CHECK_DRM_AUTH_TYPE(ctx, type) \
+    (((struct dri_state *)(ctx)->dri_state)->driConnectedFlag == (type))
+
+# define drm_state              dri_state
+# define VA_DRM_AUTH_DRI1       VA_DRI1
+# define VA_DRM_AUTH_DRI2       VA_DRI2
+# define VA_DRM_AUTH_CUSTOM     VA_DUMMY
+#endif
+
+#endif /* VA_BACKEND_COMPAT_H */
diff --git a/src/wayland-drm-client-protocol.h b/src/wayland-drm-client-protocol.h
new file mode 100644
index 0000000..cba188e
--- /dev/null
+++ b/src/wayland-drm-client-protocol.h
@@ -0,0 +1,213 @@
+/* 
+ * Copyright © 2008-2011 Kristian Høgsberg
+ * Copyright © 2010-2011 Intel Corporation
+ * 
+ * Permission to use, copy, modify, distribute, and sell this
+ * software and its documentation for any purpose is hereby granted
+ * without fee, provided that\n the above copyright notice appear in
+ * all copies and that both that copyright notice and this permission
+ * notice appear in supporting documentation, and that the name of
+ * the copyright holders not be used in advertising or publicity
+ * pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied
+ * warranty.
+ * 
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+#ifndef DRM_CLIENT_PROTOCOL_H
+#define DRM_CLIENT_PROTOCOL_H
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stddef.h>
+#include "wayland-client.h"
+
+struct wl_client;
+struct wl_resource;
+
+struct wl_drm;
+
+extern const struct wl_interface wl_drm_interface;
+
+#ifndef WL_DRM_ERROR_ENUM
+#define WL_DRM_ERROR_ENUM
+enum wl_drm_error {
+	WL_DRM_ERROR_AUTHENTICATE_FAIL = 0,
+	WL_DRM_ERROR_INVALID_FORMAT = 1,
+	WL_DRM_ERROR_INVALID_NAME = 2,
+};
+#endif /* WL_DRM_ERROR_ENUM */
+
+#ifndef WL_DRM_FORMAT_ENUM
+#define WL_DRM_FORMAT_ENUM
+enum wl_drm_format {
+	WL_DRM_FORMAT_C8 = 0x20203843,
+	WL_DRM_FORMAT_RGB332 = 0x38424752,
+	WL_DRM_FORMAT_BGR233 = 0x38524742,
+	WL_DRM_FORMAT_XRGB4444 = 0x32315258,
+	WL_DRM_FORMAT_XBGR4444 = 0x32314258,
+	WL_DRM_FORMAT_RGBX4444 = 0x32315852,
+	WL_DRM_FORMAT_BGRX4444 = 0x32315842,
+	WL_DRM_FORMAT_ARGB4444 = 0x32315241,
+	WL_DRM_FORMAT_ABGR4444 = 0x32314241,
+	WL_DRM_FORMAT_RGBA4444 = 0x32314152,
+	WL_DRM_FORMAT_BGRA4444 = 0x32314142,
+	WL_DRM_FORMAT_XRGB1555 = 0x35315258,
+	WL_DRM_FORMAT_XBGR1555 = 0x35314258,
+	WL_DRM_FORMAT_RGBX5551 = 0x35315852,
+	WL_DRM_FORMAT_BGRX5551 = 0x35315842,
+	WL_DRM_FORMAT_ARGB1555 = 0x35315241,
+	WL_DRM_FORMAT_ABGR1555 = 0x35314241,
+	WL_DRM_FORMAT_RGBA5551 = 0x35314152,
+	WL_DRM_FORMAT_BGRA5551 = 0x35314142,
+	WL_DRM_FORMAT_RGB565 = 0x36314752,
+	WL_DRM_FORMAT_BGR565 = 0x36314742,
+	WL_DRM_FORMAT_RGB888 = 0x34324752,
+	WL_DRM_FORMAT_BGR888 = 0x34324742,
+	WL_DRM_FORMAT_XRGB8888 = 0x34325258,
+	WL_DRM_FORMAT_XBGR8888 = 0x34324258,
+	WL_DRM_FORMAT_RGBX8888 = 0x34325852,
+	WL_DRM_FORMAT_BGRX8888 = 0x34325842,
+	WL_DRM_FORMAT_ARGB8888 = 0x34325241,
+	WL_DRM_FORMAT_ABGR8888 = 0x34324241,
+	WL_DRM_FORMAT_RGBA8888 = 0x34324152,
+	WL_DRM_FORMAT_BGRA8888 = 0x34324142,
+	WL_DRM_FORMAT_XRGB2101010 = 0x30335258,
+	WL_DRM_FORMAT_XBGR2101010 = 0x30334258,
+	WL_DRM_FORMAT_RGBX1010102 = 0x30335852,
+	WL_DRM_FORMAT_BGRX1010102 = 0x30335842,
+	WL_DRM_FORMAT_ARGB2101010 = 0x30335241,
+	WL_DRM_FORMAT_ABGR2101010 = 0x30334241,
+	WL_DRM_FORMAT_RGBA1010102 = 0x30334152,
+	WL_DRM_FORMAT_BGRA1010102 = 0x30334142,
+	WL_DRM_FORMAT_YUYV = 0x56595559,
+	WL_DRM_FORMAT_YVYU = 0x55595659,
+	WL_DRM_FORMAT_UYVY = 0x59565955,
+	WL_DRM_FORMAT_VYUY = 0x59555956,
+	WL_DRM_FORMAT_AYUV = 0x56555941,
+	WL_DRM_FORMAT_NV12 = 0x3231564e,
+	WL_DRM_FORMAT_NV21 = 0x3132564e,
+	WL_DRM_FORMAT_NV16 = 0x3631564e,
+	WL_DRM_FORMAT_NV61 = 0x3136564e,
+	WL_DRM_FORMAT_YUV410 = 0x39565559,
+	WL_DRM_FORMAT_YVU410 = 0x39555659,
+	WL_DRM_FORMAT_YUV411 = 0x31315559,
+	WL_DRM_FORMAT_YVU411 = 0x31315659,
+	WL_DRM_FORMAT_YUV420 = 0x32315559,
+	WL_DRM_FORMAT_YVU420 = 0x32315659,
+	WL_DRM_FORMAT_YUV422 = 0x36315559,
+	WL_DRM_FORMAT_YVU422 = 0x36315659,
+	WL_DRM_FORMAT_YUV444 = 0x34325559,
+	WL_DRM_FORMAT_YVU444 = 0x34325659,
+};
+#endif /* WL_DRM_FORMAT_ENUM */
+
+struct wl_drm_listener {
+	/**
+	 * device - device
+	 * @name: name
+	 */
+	void (*device)(void *data,
+		       struct wl_drm *wl_drm,
+		       const char *name);
+	/**
+	 * format - format
+	 * @format: format
+	 */
+	void (*format)(void *data,
+		       struct wl_drm *wl_drm,
+		       uint32_t format);
+	/**
+	 * authenticated - authenticated
+	 */
+	void (*authenticated)(void *data,
+			      struct wl_drm *wl_drm);
+};
+
+static inline int
+wl_drm_add_listener(struct wl_drm *wl_drm,
+		    const struct wl_drm_listener *listener, void *data)
+{
+	return wl_proxy_add_listener((struct wl_proxy *) wl_drm,
+				     (void (**)(void)) listener, data);
+}
+
+#define WL_DRM_AUTHENTICATE	0
+#define WL_DRM_CREATE_BUFFER	1
+#define WL_DRM_CREATE_PLANAR_BUFFER	2
+
+static inline void
+wl_drm_set_user_data(struct wl_drm *wl_drm, void *user_data)
+{
+	wl_proxy_set_user_data((struct wl_proxy *) wl_drm, user_data);
+}
+
+static inline void *
+wl_drm_get_user_data(struct wl_drm *wl_drm)
+{
+	return wl_proxy_get_user_data((struct wl_proxy *) wl_drm);
+}
+
+static inline void
+wl_drm_destroy(struct wl_drm *wl_drm)
+{
+	wl_proxy_destroy((struct wl_proxy *) wl_drm);
+}
+
+static inline void
+wl_drm_authenticate(struct wl_drm *wl_drm, uint32_t id)
+{
+	wl_proxy_marshal((struct wl_proxy *) wl_drm,
+			 WL_DRM_AUTHENTICATE, id);
+}
+
+static inline struct wl_buffer *
+wl_drm_create_buffer(struct wl_drm *wl_drm, uint32_t name, int32_t width, int32_t height, uint32_t stride, uint32_t format)
+{
+	struct wl_proxy *id;
+
+	id = wl_proxy_create((struct wl_proxy *) wl_drm,
+			     &wl_buffer_interface);
+	if (!id)
+		return NULL;
+
+	wl_proxy_marshal((struct wl_proxy *) wl_drm,
+			 WL_DRM_CREATE_BUFFER, id, name, width, height, stride, format);
+
+	return (struct wl_buffer *) id;
+}
+
+static inline struct wl_buffer *
+wl_drm_create_planar_buffer(struct wl_drm *wl_drm, uint32_t name, int32_t width, int32_t height, uint32_t format, int32_t offset0, int32_t stride0, int32_t offset1, int32_t stride1, int32_t offset2, int32_t stride2)
+{
+	struct wl_proxy *id;
+
+	id = wl_proxy_create((struct wl_proxy *) wl_drm,
+			     &wl_buffer_interface);
+	if (!id)
+		return NULL;
+
+	wl_proxy_marshal((struct wl_proxy *) wl_drm,
+			 WL_DRM_CREATE_PLANAR_BUFFER, id, name, width, height, format, offset0, stride0, offset1, stride1, offset2, stride2);
+
+	return (struct wl_buffer *) id;
+}
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/wayland/Makefile.am b/src/wayland/Makefile.am
new file mode 100644
index 0000000..614d8a4
--- /dev/null
+++ b/src/wayland/Makefile.am
@@ -0,0 +1,28 @@
+# Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sub license, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+# 
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial portions
+# of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+# IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+EXTRA_DIST = \
+	wayland-drm.xml		\
+	$(NULL)
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
diff --git a/src/wayland/wayland-drm.xml b/src/wayland/wayland-drm.xml
new file mode 100644
index 0000000..265d4f8
--- /dev/null
+++ b/src/wayland/wayland-drm.xml
@@ -0,0 +1,155 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<protocol name="drm">
+
+  <copyright>
+    Copyright © 2008-2011 Kristian Høgsberg
+    Copyright © 2010-2011 Intel Corporation
+
+    Permission to use, copy, modify, distribute, and sell this
+    software and its documentation for any purpose is hereby granted
+    without fee, provided that\n the above copyright notice appear in
+    all copies and that both that copyright notice and this permission
+    notice appear in supporting documentation, and that the name of
+    the copyright holders not be used in advertising or publicity
+    pertaining to distribution of the software without specific,
+    written prior permission.  The copyright holders make no
+    representations about the suitability of this software for any
+    purpose.  It is provided "as is" without express or implied
+    warranty.
+
+    THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+    SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+    FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+    WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+    AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+    ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+    THIS SOFTWARE.
+  </copyright>
+
+  <!-- drm support. This object is created by the server and published
+       using the display's global event. -->
+  <interface name="wl_drm" version="1">
+    <enum name="error">
+      <entry name="authenticate_fail" value="0"/>
+      <entry name="invalid_format" value="1"/>
+      <entry name="invalid_name" value="2"/>
+    </enum>
+
+    <enum name="format">
+      <!-- The drm format codes match the #defines in drm_fourcc.h.
+           The formats actually supported by the compositor will be
+           reported by the format event. -->
+      <entry name="c8" value="0x20203843"/>
+      <entry name="rgb332" value="0x38424752"/>
+      <entry name="bgr233" value="0x38524742"/>
+      <entry name="xrgb4444" value="0x32315258"/>
+      <entry name="xbgr4444" value="0x32314258"/>
+      <entry name="rgbx4444" value="0x32315852"/>
+      <entry name="bgrx4444" value="0x32315842"/>
+      <entry name="argb4444" value="0x32315241"/>
+      <entry name="abgr4444" value="0x32314241"/>
+      <entry name="rgba4444" value="0x32314152"/>
+      <entry name="bgra4444" value="0x32314142"/>
+      <entry name="xrgb1555" value="0x35315258"/>
+      <entry name="xbgr1555" value="0x35314258"/>
+      <entry name="rgbx5551" value="0x35315852"/>
+      <entry name="bgrx5551" value="0x35315842"/>
+      <entry name="argb1555" value="0x35315241"/>
+      <entry name="abgr1555" value="0x35314241"/>
+      <entry name="rgba5551" value="0x35314152"/>
+      <entry name="bgra5551" value="0x35314142"/>
+      <entry name="rgb565" value="0x36314752"/>
+      <entry name="bgr565" value="0x36314742"/>
+      <entry name="rgb888" value="0x34324752"/>
+      <entry name="bgr888" value="0x34324742"/>
+      <entry name="xrgb8888" value="0x34325258"/>
+      <entry name="xbgr8888" value="0x34324258"/>
+      <entry name="rgbx8888" value="0x34325852"/>
+      <entry name="bgrx8888" value="0x34325842"/>
+      <entry name="argb8888" value="0x34325241"/>
+      <entry name="abgr8888" value="0x34324241"/>
+      <entry name="rgba8888" value="0x34324152"/>
+      <entry name="bgra8888" value="0x34324142"/>
+      <entry name="xrgb2101010" value="0x30335258"/>
+      <entry name="xbgr2101010" value="0x30334258"/>
+      <entry name="rgbx1010102" value="0x30335852"/>
+      <entry name="bgrx1010102" value="0x30335842"/>
+      <entry name="argb2101010" value="0x30335241"/>
+      <entry name="abgr2101010" value="0x30334241"/>
+      <entry name="rgba1010102" value="0x30334152"/>
+      <entry name="bgra1010102" value="0x30334142"/>
+      <entry name="yuyv" value="0x56595559"/>
+      <entry name="yvyu" value="0x55595659"/>
+      <entry name="uyvy" value="0x59565955"/>
+      <entry name="vyuy" value="0x59555956"/>
+      <entry name="ayuv" value="0x56555941"/>
+      <entry name="nv12" value="0x3231564e"/>
+      <entry name="nv21" value="0x3132564e"/>
+      <entry name="nv16" value="0x3631564e"/>
+      <entry name="nv61" value="0x3136564e"/>
+      <entry name="yuv410" value="0x39565559"/>
+      <entry name="yvu410" value="0x39555659"/>
+      <entry name="yuv411" value="0x31315559"/>
+      <entry name="yvu411" value="0x31315659"/>
+      <entry name="yuv420" value="0x32315559"/>
+      <entry name="yvu420" value="0x32315659"/>
+      <entry name="yuv422" value="0x36315559"/>
+      <entry name="yvu422" value="0x36315659"/>
+      <entry name="yuv444" value="0x34325559"/>
+      <entry name="yvu444" value="0x34325659"/>
+    </enum>
+
+    <!-- Call this request with the magic received from drmGetMagic().
+         It will be passed on to the drmAuthMagic() or
+         DRIAuthConnection() call.  This authentication must be
+         completed before create_buffer could be used. -->
+    <request name="authenticate">
+      <arg name="id" type="uint"/>
+    </request>
+
+    <!-- Create a wayland buffer for the named DRM buffer.  The DRM
+         surface must have a name using the flink ioctl -->
+    <request name="create_buffer">
+      <arg name="id" type="new_id" interface="wl_buffer"/>
+      <arg name="name" type="uint"/>
+      <arg name="width" type="int"/>
+      <arg name="height" type="int"/>
+      <arg name="stride" type="uint"/>
+      <arg name="format" type="uint"/>
+    </request>
+
+    <!-- Create a wayland buffer for the named DRM buffer.  The DRM
+         surface must have a name using the flink ioctl -->
+    <request name="create_planar_buffer">
+      <arg name="id" type="new_id" interface="wl_buffer"/>
+      <arg name="name" type="uint"/>
+      <arg name="width" type="int"/>
+      <arg name="height" type="int"/>
+      <arg name="format" type="uint"/>
+      <arg name="offset0" type="int"/>
+      <arg name="stride0" type="int"/>
+      <arg name="offset1" type="int"/>
+      <arg name="stride1" type="int"/>
+      <arg name="offset2" type="int"/>
+      <arg name="stride2" type="int"/>
+    </request>
+
+    <!-- Notification of the path of the drm device which is used by
+         the server.  The client should use this device for creating
+         local buffers.  Only buffers created from this device should
+         be be passed to the server using this drm object's
+         create_buffer request. -->
+    <event name="device">
+      <arg name="name" type="string"/>
+    </event>
+
+    <event name="format">
+      <arg name="format" type="uint"/>
+    </event>
+
+    <!-- Raised if the authenticate request succeeded -->
+    <event name="authenticated"/>
+  </interface>
+
+</protocol>

-- 
intel-vaapi-driver packaging



More information about the pkg-multimedia-commits mailing list