[SCM] intel-vaapi-driver/upstream: Imported Upstream version 1.0.19
mfv-guest at users.alioth.debian.org
mfv-guest at users.alioth.debian.org
Tue Jan 15 15:26:45 UTC 2013
The following commit has been merged in the upstream branch:
commit c4f72d5a025d8139d20e333ee60891bf21d14ad4
Author: Matteo F. Vescovi <mfv.debian at gmail.com>
Date: Tue Jan 15 16:23:52 2013 +0100
Imported Upstream version 1.0.19
diff --git a/NEWS b/NEWS
index edb7d75..1db2047 100644
--- a/NEWS
+++ b/NEWS
@@ -1,15 +1,41 @@
-libva-driver-intel NEWS -- summary of changes. 2012-02-DD
+libva-intel-driver NEWS -- summary of changes. 2012-11-09
Copyright (C) 2009-2011 Intel Corporation
-Version 1.0.16 - DD.Feb.2012
-* Fix MPEG-2 decoding of interlaced streams (SNB, IVB)
+Version 1.0.19 - 09.Nov.2012
+* Add support for Haswell
+* Add raw DRM support (Dmitry Ermilov)
+* Add Wayland support
+* Add support for display rotation attribute
+* Support 4K encoding on IVB and HSW
+* Drop explicit dependency on X11 and libva-x11
+* Fix VC-1 decoding when VSTRANSFORM is 0
+* Fix SIGSEGV caused by use-after-free of the bufmgr (Stéphane Marchesin)
+* Fix thread safety issue (Gautam)
+* Fix vaUnlockSurface() for libva trace
+
+Version 1.0.18 - 02.Aug.2012
+* Add JPEG decoding on Ivy Bridge
+* Add support for a new Ivy Bridge chip
+* Add support for vaSyncSurface() and vaQuerySurfaceStatus() (Dmitry Ermilov)
+* Fix decoding of MPEG-2 videos with implicit IQ matrices
+* Fix concurrent creation of VA objects (MT safety)
+* Fix decoding of large resolution videos (up to 4K on IVB)
+
+Version 1.0.17 - 02.Apr.2012
+* Add support for IMC1/IMC3 surface formats
* Fix rendering of interlaced surfaces
+* Fix MPEG-2 decoding of interlaced streams (SNB, IVB)
+* Fix H.264 weighted prediction indicator (SNB)
+* Fix and simplify calculation of H.264 macroblock bit offset (ILK, SNB, IVB)
+
+Version 1.0.16 - 14.Feb.2012
* Fix VC-1 bitplane buffer size (SNB, IVB)
* Fix VC-1 motion vector modes for Ivy Bridge
-* Fix weighted prediction indicator for Sandy Bridge
* Fix MFX_QM_STATE for H.264 flat scaling lists (IVB)
* Fix and simplify AVC_REF_IDX_STATE setup (ILK, SNB, IVB)
-* Fix and simplify first macroblock bit offset calculation (ILK, SNB, IVB)
+* Fix memory leak of encoder buffers
+* Fix check for internal VA surface format prior to rendering
+* Add support for B43 chipset (Alexander Inyukhin)
Version 1.0.15 - 28.Oct.2011
* Add auto-generated Debian packaging
diff --git a/README b/README
index d681d42..09ee0b3 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
- libva-driver-intel
+ libva-intel-driver
VA driver for Intel G45 & HD Graphics family
Copyright (C) 2009-2011 Intel Corporation
@@ -14,7 +14,7 @@ Please read the COPYING file available in this package.
Overview
--------
-libva-driver-intel is the VA-API implementation for Intel G45 chipsets
+libva-intel-driver is the VA-API implementation for Intel G45 chipsets
and Intel HD Graphics for Intel Core processor family.
Platform definitions:
@@ -36,4 +36,4 @@ VC-1 D SNB+
Requirements
------------
-libva >= 1.0.14
+libva >= 1.0.16
diff --git a/configure.ac b/configure.ac
index d2481d0..7a9e097 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
# intel-driver package version number
m4_define([intel_driver_major_version], [1])
m4_define([intel_driver_minor_version], [0])
-m4_define([intel_driver_micro_version], [18])
+m4_define([intel_driver_micro_version], [19])
m4_define([intel_driver_pre_version], [0])
m4_define([intel_driver_version],
[intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version])
@@ -11,15 +11,16 @@ m4_append([intel_driver_version], intel_driver_pre_version, [.pre])
# libva minimum version requirement
m4_define([libva_package_version], [1.0.14])
+m4_define([va_api_version], [0.32.0])
# libdrm minimum version requirement
m4_define([libdrm_version], [2.4.23])
AC_PREREQ([2.57])
AC_INIT([intel_driver], [intel_driver_version], [haihao.xiang at intel.com],
- [libva-driver-intel])
+ [libva-intel-driver])
AC_CONFIG_SRCDIR([Makefile.am])
-AM_INIT_AUTOMAKE
+AM_INIT_AUTOMAKE([1.9 tar-ustar])
AM_CONFIG_HEADER([src/config.h])
@@ -40,6 +41,21 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])], [
AC_SUBST(AM_DEFAULT_VERBOSITY)
])
+AC_ARG_ENABLE(drm,
+ [AC_HELP_STRING([--enable-drm],
+ [build with VA/DRM API support @<:@default=yes@:>@])],
+ [], [enable_drm="yes"])
+
+AC_ARG_ENABLE(x11,
+ [AC_HELP_STRING([--enable-x11],
+ [build with VA/X11 API support @<:@default=yes@:>@])],
+ [], [enable_x11="yes"])
+
+AC_ARG_ENABLE([wayland],
+ [AC_HELP_STRING([--enable-wayland],
+ [build with VA/Wayland API support @<:@default=yes@:>@])],
+ [], [enable_wayland="yes"])
+
AC_DISABLE_STATIC
AC_PROG_LIBTOOL
AC_PROG_CC
@@ -58,13 +74,29 @@ PKG_CHECK_MODULES([DRM], [libdrm >= $LIBDRM_VERSION])
AC_SUBST(LIBDRM_VERSION)
dnl Check for gen4asm
-PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.2], [gen4asm=yes], [gen4asm=no])
+PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.3], [gen4asm=yes], [gen4asm=no])
AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes)
AC_PATH_PROG([GEN4ASM], [intel-gen4asm])
dnl Check for VA-API
-PKG_CHECK_MODULES(LIBVA_DEPS, [libva])
-PKG_CHECK_MODULES(LIBVA_X11_DEPS, [libva-x11])
+PKG_CHECK_MODULES(LIBVA_DEPS, [libva >= va_api_version])
+
+dnl Check for VA/DRM API
+USE_DRM="$enable_drm"
+if test "$USE_DRM" = "yes"; then
+ PKG_CHECK_MODULES(LIBVA_DRM_DEPS, [libva-drm],
+ [AC_DEFINE([HAVE_VA_DRM], [1], [Defined to 1 if VA/DRM API is enabled])],
+ [USE_DRM="no"])
+
+ # Check for <drm_fourcc.h>
+ if test "$USE_DRM" = "yes"; then
+ saved_CPPFLAGS="$CPPFLAGS"
+ CPPFLAGS="$CPPFLAGS $DRM_CFLAGS"
+ AC_CHECK_HEADERS([drm_fourcc.h], [:], [USE_DRM="no"])
+ CPPFLAGS="$saved_CPPFLAGS"
+ fi
+fi
+AM_CONDITIONAL(USE_DRM, test "$USE_DRM" = "yes")
VA_VERSION=`$PKG_CONFIG --modversion libva`
VA_MAJOR_VERSION=`echo "$VA_VERSION" | cut -d'.' -f1`
@@ -80,6 +112,15 @@ VA_DRIVER_INIT_FUNC="__vaDriverInit_${VA_MAJOR_VERSION}_${VA_MINOR_VERSION}"
AC_DEFINE_UNQUOTED([VA_DRIVER_INIT_FUNC], [$VA_DRIVER_INIT_FUNC],
[Define driver entry-point])
+dnl Check for VA/DRM API
+USE_X11="$enable_x11"
+if test "$USE_X11" = "yes"; then
+ PKG_CHECK_MODULES(LIBVA_X11_DEPS, [libva-x11],
+ [AC_DEFINE([HAVE_VA_X11], [1], [Defined to 1 if VA/X11 API is enabled])],
+ [USE_X11="no"])
+fi
+AM_CONDITIONAL(USE_X11, test "$USE_X11" = "yes")
+
dnl Check for VA-API drivers path
AC_MSG_CHECKING([for VA drivers path])
LIBVA_DRIVERS_PATH=`$PKG_CONFIG libva --variable driverdir`
@@ -89,25 +130,60 @@ fi
AC_MSG_RESULT([$LIBVA_DRIVERS_PATH])
AC_SUBST(LIBVA_DRIVERS_PATH)
+# Check for EGL
+if test "$enable_wayland" = "yes"; then
+ enable_egl="yes"
+fi
+
+USE_EGL="no"
+if test "$enable_egl" = "yes"; then
+ PKG_CHECK_MODULES([EGL], [egl], [USE_EGL="yes"], [USE_EGL="no"])
+ saved_CPPFLAGS="$CPPFLAGS"
+ saved_LIBS="$LIBS"
+ CPPFLAGS="$CPPFLAGS $EGL_CFLAGS"
+ LIBS="$LIBS $EGL_LIBS"
+ AC_CHECK_HEADERS([EGL/egl.h], [:], [USE_EGL="no"])
+ AC_CHECK_LIB([EGL], [eglGetDisplay], [:], [USE_EGL="no"])
+ CPPFLAGS="$saved_CPPFLAGS"
+ LIBS="$saved_LIBS"
+fi
+AM_CONDITIONAL(USE_EGL, test "$USE_EGL" = "yes")
+
+# Check for Wayland
+USE_WAYLAND="no"
+if test "$enable_wayland" = "yes"; then
+ PKG_CHECK_MODULES([WAYLAND], [wayland-client], [USE_WAYLAND="yes"], [:])
+ PKG_CHECK_MODULES([LIBVA_WAYLAND_DEPS], [libva-wayland],
+ [AC_DEFINE([HAVE_VA_WAYLAND], [1], [Defined to 1 if VA/Wayland API is enabled])],
+ [USE_WAYLAND="no"])
+fi
+AM_CONDITIONAL(USE_WAYLAND, test "$USE_WAYLAND" = "yes")
+
+m4_ifdef([WAYLAND_SCANNER_RULES],
+ [WAYLAND_SCANNER_RULES(['$(top_srcdir)/src/wayland'])],
+ [wayland_scanner_rules=""; AC_SUBST(wayland_scanner_rules)])
+
dnl Check for JPEG decoding API
-AC_CACHE_CHECK([for JPEG decoding API], ac_cv_have_jpeg_decoding_api, [
- saved_CFLAGS="$CFLAGS"
- CFLAGS="$CFLAGS $LIBVA_DEPS_CFLAGS"
+AC_CACHE_CHECK([for JPEG decoding API], ac_cv_have_va_jpeg_decode, [
+ saved_CPPFLAGS="$CPPFLAGS"
+ CPPFLAGS="$CPPFLAGS $LIBVA_DEPS_CFLAGS"
saved_LIBS="$LIBS"
- LIBS="$CFLAGS $LIBVA_DEPS_LIBS"
- AC_TRY_COMPILE(
- [#include <va/va.h>],
- [VAPictureParameterBufferJPEG pic_param;
- VASliceParameterBufferJPEG slice_param;
- VAIQMatrixBufferJPEG iq_matrix;],
- [ac_cv_have_jpeg_decoding_api="yes"],
- [ac_cv_have_jpeg_decoding_api="no"]
+ LIBS="$LIBS $LIBVA_DEPS_LIBS"
+ AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM(
+ [[#include <va/va.h>]],
+ [[VAPictureParameterBufferJPEGBaseline pic_param;
+ VASliceParameterBufferJPEGBaseline slice_param;
+ VAHuffmanTableBufferJPEGBaseline huffman_table;
+ VAIQMatrixBufferJPEGBaseline iq_matrix;]])],
+ [ac_cv_have_va_jpeg_decode="yes"],
+ [ac_cv_have_va_jpeg_decode="no"]
)
- CFLAGS="$saved_CFLAGS"
+ CPPFLAGS="$saved_CPPFLAGS"
LIBS="$saved_LIBS"
])
-if test "$ac_cv_have_jpeg_decoding_api" = "yes"; then
- AC_DEFINE(HAVE_JPEG_DECODING, 1,
+if test "$ac_cv_have_va_jpeg_decode" = "yes"; then
+ AC_DEFINE(HAVE_VA_JPEG_DECODE, 1,
[Defined to 1 if VA-API exposes JPEG decoding])
fi
@@ -122,14 +198,23 @@ AC_OUTPUT([
src/shaders/mpeg2/Makefile
src/shaders/mpeg2/vld/Makefile
src/shaders/post_processing/Makefile
+ src/shaders/post_processing/gen5_6/Makefile
+ src/shaders/post_processing/gen7/Makefile
src/shaders/render/Makefile
src/shaders/vme/Makefile
+ src/wayland/Makefile
])
dnl Print summary
+BACKENDS=""
+AS_IF([test "$USE_DRM" = "yes"], [BACKENDS="$BACKENDS drm"])
+AS_IF([test "$USE_X11" = "yes"], [BACKENDS="$BACKENDS x11"])
+AS_IF([test "$USE_WAYLAND" = "yes"], [BACKENDS="$BACKENDS wayland"])
+
echo
echo $PACKAGE configuration summary:
echo
echo VA-API version ................... : $VA_VERSION_STR
echo VA-API drivers path .............. : $LIBVA_DRIVERS_PATH
+echo Windowing systems ................ : $BACKENDS
echo
diff --git a/debian.upstream/Makefile.am b/debian.upstream/Makefile.am
index 679dab6..9ef70b6 100644
--- a/debian.upstream/Makefile.am
+++ b/debian.upstream/Makefile.am
@@ -3,7 +3,7 @@ DEBIANFILES = \
compat \
control.in \
copyright \
- libva-driver-intel.install \
+ libva-intel-driver.install \
rules \
$(NULL)
diff --git a/debian.upstream/changelog.in b/debian.upstream/changelog.in
index 70310b5..dff4a96 100644
--- a/debian.upstream/changelog.in
+++ b/debian.upstream/changelog.in
@@ -1,4 +1,4 @@
-libva-driver-intel (@PACKAGE_VERSION at -1) unstable; urgency=low
+libva-intel-driver (@PACKAGE_VERSION at -1) unstable; urgency=low
* Autogenerated package, see NEWS file for ChangeLog.
diff --git a/debian.upstream/control.in b/debian.upstream/control.in
index 88ee928..08b1029 100644
--- a/debian.upstream/control.in
+++ b/debian.upstream/control.in
@@ -1,4 +1,4 @@
-Source: libva-driver-intel
+Source: libva-intel-driver
Section: libs
Priority: optional
Maintainer: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
@@ -8,7 +8,7 @@ Build-Depends: debhelper (>= 5),
libva-dev (>= @LIBVA_PACKAGE_VERSION@)
Standards-Version: 3.7.2
-Package: libva-driver-intel
+Package: libva-intel-driver
Section: libs
Architecture: any
Depends: libva1 (>= @LIBVA_PACKAGE_VERSION@),
@@ -17,10 +17,10 @@ Description: VA driver for Intel G45 & HD Graphics family
Video decode & encode driver for Intel G45 chipsets and Intel HD
Graphics for Intel Core processor family.
-Package: libva-driver-intel-dbg
+Package: libva-intel-driver-dbg
Section: libdevel
Architecture: any
-Depends: libva-driver-intel (= ${Source-Version})
+Depends: libva-intel-driver (= ${Source-Version})
Description: VA driver for Intel G45 & HD Graphics family (debug symbols)
Video decode & encode driver for Intel G45 chipsets and Intel HD
Graphics for Intel Core processor family.
diff --git a/debian.upstream/libva-driver-intel.install b/debian.upstream/libva-intel-driver.install
similarity index 100%
rename from debian.upstream/libva-driver-intel.install
rename to debian.upstream/libva-intel-driver.install
diff --git a/src/Makefile.am b/src/Makefile.am
index 1e4bbf2..cbe0795 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -40,17 +40,21 @@ driver_ldflags = \
$(NULL)
driver_libs = \
- -lpthread \
+ -lpthread -ldl \
$(DRM_LIBS) -ldrm_intel \
$(LIBVA_DEPS_LIBS) \
- $(LIBVA_X11_DEPS_LIBS) \
$(NULL)
source_c = \
+ dso_utils.c \
gen6_mfc.c \
gen6_mfd.c \
gen6_vme.c \
gen7_mfd.c \
+ gen75_mfd.c \
+ gen75_vme.c \
+ gen75_mfc.c \
+ gen75_vpp_vebox.c \
i965_avc_bsd.c \
i965_avc_hw_scoreboard.c\
i965_avc_ildb.c \
@@ -70,10 +74,12 @@ source_c = \
$(NULL)
source_h = \
+ dso_utils.h \
gen6_mfc.h \
gen6_mfd.h \
gen6_vme.h \
gen7_mfd.h \
+ gen75_vpp_vebox.h \
i965_avc_bsd.h \
i965_avc_hw_scoreboard.h\
i965_avc_ildb.h \
@@ -95,6 +101,8 @@ source_h = \
intel_driver.h \
intel_memman.h \
object_heap.h \
+ sysdeps.h \
+ va_backend_compat.h \
$(NULL)
i965_drv_video_la_LTLIBRARIES = i965_drv_video.la
@@ -105,5 +113,22 @@ i965_drv_video_la_LIBADD = $(driver_libs)
i965_drv_video_la_SOURCES = $(source_c)
noinst_HEADERS = $(source_h)
+if USE_X11
+source_c += i965_output_dri.c
+source_h += i965_output_dri.h
+endif
+
+if USE_WAYLAND
+source_c += i965_output_wayland.c
+source_h += i965_output_wayland.h
+driver_cflags += $(WAYLAND_CFLAGS)
+endif
+
+# Wayland protocol
+i965_output_wayland.c: $(protocol_source_h)
+ at wayland_scanner_rules@
+
+DIST_SUBDIRS = $(SUBDIRS) wayland
+
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in config.h.in
diff --git a/src/dso_utils.c b/src/dso_utils.c
new file mode 100644
index 0000000..8fdea11
--- /dev/null
+++ b/src/dso_utils.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+#include "dso_utils.h"
+
+struct dso_handle {
+ void *handle;
+};
+
+/* Opens the named shared library */
+struct dso_handle *
+dso_open(const char *path)
+{
+ struct dso_handle *h;
+
+ h = calloc(1, sizeof(*h));
+ if (!h)
+ return NULL;
+
+ if (path) {
+ h->handle = dlopen(path, RTLD_LAZY|RTLD_LOCAL);
+ if (!h->handle)
+ goto error;
+ }
+ else
+ h->handle = RTLD_DEFAULT;
+ return h;
+
+error:
+ dso_close(h);
+ return NULL;
+}
+
+/* Closes and disposed any allocated data */
+void
+dso_close(struct dso_handle *h)
+{
+ if (!h)
+ return;
+
+ if (h->handle) {
+ if (h->handle != RTLD_DEFAULT)
+ dlclose(h->handle);
+ h->handle = NULL;
+ }
+ free(h);
+}
+
+/* Load symbol into the supplied location */
+static bool
+get_symbol(struct dso_handle *h, void *func_vptr, const char *name)
+{
+ dso_generic_func func, * const func_ptr = func_vptr;
+ const char *error;
+
+ dlerror();
+ func = (dso_generic_func)dlsym(h->handle, name);
+ error = dlerror();
+ if (error) {
+ fprintf(stderr, "error: failed to resolve %s(): %s\n", name, error);
+ return false;
+ }
+ *func_ptr = func;
+ return true;
+}
+
+/* Loads symbols into the supplied vtable */
+bool
+dso_get_symbols(
+ struct dso_handle *h,
+ void *vtable,
+ unsigned int vtable_length,
+ const struct dso_symbol *symbols
+)
+{
+ const struct dso_symbol *s;
+
+ for (s = symbols; s->name != NULL; s++) {
+ if (s->offset + sizeof(dso_generic_func) > vtable_length)
+ return false;
+ if (!get_symbol(h, ((char *)vtable) + s->offset, s->name))
+ return false;
+ }
+ return true;
+}
diff --git a/src/dso_utils.h b/src/dso_utils.h
new file mode 100644
index 0000000..9b8eba7
--- /dev/null
+++ b/src/dso_utils.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef DSO_UTILS_H
+#define DSO_UTILS_H
+
+#include <stdbool.h>
+
+/** Generic pointer to function. */
+typedef void (*dso_generic_func)(void);
+
+/** Library handle (opaque). */
+struct dso_handle;
+
+/** Symbol lookup table. */
+struct dso_symbol {
+ /** Symbol name */
+ const char *name;
+ /** Offset into the supplied vtable where symbol is to be loaded. */
+ unsigned int offset;
+};
+
+/**
+ * Opens the named shared library.
+ *
+ * @param[in] path the library name, or NULL to lookup into loaded libraries
+ * @return the newly allocated library handle
+ */
+struct dso_handle *
+dso_open(const char *path);
+
+/** Closes and disposed any allocated data. */
+void
+dso_close(struct dso_handle *h);
+
+/**
+ * Loads symbols into the supplied vtable.
+ *
+ * @param[in] handle the DSO handle
+ * @param[in] vtable the function table to fill in
+ * @param[in] vtable_length the size (in bytes) of the function table
+ * @param[in] symbols the NULL terminated array of symbols to lookup
+ * @return true on success, false otherwise
+ **/
+bool
+dso_get_symbols(
+ struct dso_handle *h,
+ void *vtable,
+ unsigned int vtable_length,
+ const struct dso_symbol *symbols
+);
+
+#endif /* DSO_UTILS_H */
diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 863d4ec..a479f80 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -39,9 +39,12 @@
#include "i965_encoder.h"
static void
-gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_pipe_mode_select(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
BEGIN_BCS_BATCH(batch, 4);
@@ -73,9 +76,11 @@ gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen
static void
gen7_mfc_pipe_mode_select(VADriverContextP ctx,
int standard_select,
- struct gen6_encoder_context *gen6_encoder_context)
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
assert(standard_select == MFX_FORMAT_MPEG2 ||
standard_select == MFX_FORMAT_AVC);
@@ -107,11 +112,15 @@ gen7_mfc_pipe_mode_select(VADriverContextP ctx,
}
static void
-gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_surface_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
@@ -135,11 +144,15 @@ gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e
}
static void
-gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen7_mfc_surface_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
@@ -163,12 +176,16 @@ gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e
}
static void
-gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
int i;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 24);
OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
@@ -206,11 +223,15 @@ gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *
}
static void
-gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 11);
OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
@@ -231,11 +252,15 @@ gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_conte
}
static void
-gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 11);
OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
@@ -256,11 +281,15 @@ gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_conte
}
static void
-gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 4);
OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
@@ -274,14 +303,17 @@ gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_conte
}
static void
-gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen6_mfc_avc_img_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
-
int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 13);
OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
OUT_BCS_BATCH(batch,
@@ -328,14 +360,17 @@ gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e
}
static void
-gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+gen7_mfc_avc_img_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
-
int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 16);
OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
OUT_BCS_BATCH(batch,
@@ -385,49 +420,16 @@ gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_e
ADVANCE_BCS_BATCH(batch);
}
-static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
-{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
- int i;
-
- BEGIN_BCS_BATCH(batch, 69);
-
- OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
- //TODO: reference DMV
- for(i = 0; i < 16; i++){
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- }
-
- //TODO: current DMV just for test
-#if 0
- OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- 0);
-#else
- //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
- //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
- OUT_BCS_BATCH(batch, 0);
-#endif
-
-
- OUT_BCS_BATCH(batch, 0);
-
- //TODO: POL list
- for(i = 0; i < 34; i++) {
- OUT_BCS_BATCH(batch, 0);
- }
-
- ADVANCE_BCS_BATCH(batch);
-}
-
static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
int intra_slice,
- struct gen6_encoder_context *gen6_encoder_context)
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 11);;
OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
@@ -470,11 +472,15 @@ static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
ADVANCE_BCS_BATCH(batch);
}
-static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_mfc_avc_qm_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
int i;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 58);
OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
@@ -486,11 +492,15 @@ static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_cont
ADVANCE_BCS_BATCH(batch);
}
-static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_mfc_avc_fqm_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
int i;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 113);
OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
@@ -506,11 +516,14 @@ gen7_mfc_qm_state(VADriverContextP ctx,
int qm_type,
unsigned int *qm,
int qm_length,
- struct gen6_encoder_context *gen6_encoder_context)
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
unsigned int qm_buffer[16];
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
assert(qm_length <= 16);
assert(sizeof(*qm) == 4);
memcpy(qm_buffer, qm, qm_length * 4);
@@ -522,7 +535,9 @@ gen7_mfc_qm_state(VADriverContextP ctx,
ADVANCE_BCS_BATCH(batch);
}
-static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen7_mfc_avc_qm_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
unsigned int qm[16] = {
0x10101010, 0x10101010, 0x10101010, 0x10101010,
@@ -531,10 +546,10 @@ static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_cont
0x10101010, 0x10101010, 0x10101010, 0x10101010
};
- gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
- gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
- gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
- gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
+ gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context, batch);
+ gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context, batch);
+ gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context, batch);
+ gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context, batch);
}
static void
@@ -542,11 +557,14 @@ gen7_mfc_fqm_state(VADriverContextP ctx,
int fqm_type,
unsigned int *fqm,
int fqm_length,
- struct gen6_encoder_context *gen6_encoder_context)
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
unsigned int fqm_buffer[32];
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
assert(fqm_length <= 32);
assert(sizeof(*fqm) == 4);
memcpy(fqm_buffer, fqm, fqm_length * 4);
@@ -558,7 +576,9 @@ gen7_mfc_fqm_state(VADriverContextP ctx,
ADVANCE_BCS_BATCH(batch);
}
-static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen7_mfc_avc_fqm_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
unsigned int qm[32] = {
0x10001000, 0x10001000, 0x10001000, 0x10001000,
@@ -571,17 +591,21 @@ static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_con
0x10001000, 0x10001000, 0x10001000, 0x10001000
};
- gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
- gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
- gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
- gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
+ gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context, batch);
+ gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context, batch);
+ gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context, batch);
+ gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context, batch);
}
-static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
int i;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, 10);
OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
@@ -595,32 +619,16 @@ static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder
ADVANCE_BCS_BATCH(batch);
}
-static void
-gen6_mfc_avc_insert_object(VADriverContextP ctx, int flush_data, struct gen6_encoder_context *gen6_encoder_context)
-{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
-
- BEGIN_BCS_BATCH(batch, 4);
-
- OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (4 -2 ) );
- OUT_BCS_BATCH(batch, (32<<8) |
- (1 << 3) |
- (1 << 2) |
- (flush_data << 1) |
- (1<<0) );
- OUT_BCS_BATCH(batch, 0x00000003);
- OUT_BCS_BATCH(batch, 0xABCD1234);
-
- ADVANCE_BCS_BATCH(batch);
-}
-
static int
gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
- struct gen6_encoder_context *gen6_encoder_context)
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
int len_in_dwords = 11;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, len_in_dwords);
OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
@@ -651,11 +659,13 @@ gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, in
}
static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
- struct gen6_encoder_context *gen6_encoder_context)
+ struct gen6_encoder_context *gen6_encoder_context, struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
int len_in_dwords = 11;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BCS_BATCH(batch, len_in_dwords);
OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
@@ -696,12 +706,16 @@ static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int
return len_in_dwords;
}
-static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_mfc_init(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
dri_bo *bo;
int i;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
/*Encode common setup for MFC*/
dri_bo_unreference(mfc_context->post_deblocking_output.bo);
@@ -730,7 +744,7 @@ static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen
dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"Buffer",
- 128 * 64,
+ width_in_mbs * 64,
64);
assert(bo);
mfc_context->intra_row_store_scratch_buffer.bo = bo;
@@ -738,7 +752,7 @@ static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen
dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"Buffer",
- 49152, /* 6 * 128 * 64 */
+ 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
64);
assert(bo);
mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
@@ -746,7 +760,7 @@ static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen
dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"Buffer",
- 12288, /* 1.5 * 128 * 64 */
+ 128 * width_in_mbs, /* 2 * widht_in_mbs * 64 */
0x1000);
assert(bo);
mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
@@ -757,7 +771,7 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
struct gen6_encoder_context *gen6_encoder_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+ struct intel_batchbuffer *main_batch = gen6_encoder_context->base.batch;
struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
@@ -768,8 +782,9 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
int x,y;
+ struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, width_in_mbs * height_in_mbs * 12 * 4 + 0x800);
- intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+ intel_batchbuffer_start_atomic_bcs(batch, width_in_mbs * height_in_mbs * 12 * 4 + 0x700);
if (is_intra) {
dri_bo_map(vme_context->vme_output.bo , 1);
@@ -785,39 +800,39 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
intel_batchbuffer_emit_mi_flush(batch);
if (IS_GEN7(i965->intel.device_id)) {
- gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
- gen7_mfc_surface_state(ctx, gen6_encoder_context);
- gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
+ gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context, batch);
+ gen7_mfc_surface_state(ctx, gen6_encoder_context, batch);
+ gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context, batch);
} else {
- gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
- gen6_mfc_surface_state(ctx, gen6_encoder_context);
- gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
+ gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context, batch);
+ gen6_mfc_surface_state(ctx, gen6_encoder_context, batch);
+ gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context, batch);
}
- gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
- gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
+ gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context, batch);
+ gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context, batch);
if (IS_GEN7(i965->intel.device_id)) {
- gen7_mfc_avc_img_state(ctx, gen6_encoder_context);
- gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
- gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
+ gen7_mfc_avc_img_state(ctx, gen6_encoder_context, batch);
+ gen7_mfc_avc_qm_state(ctx, gen6_encoder_context, batch);
+ gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context, batch);
} else {
- gen6_mfc_avc_img_state(ctx, gen6_encoder_context);
- gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
- gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
+ gen6_mfc_avc_img_state(ctx, gen6_encoder_context, batch);
+ gen6_mfc_avc_qm_state(ctx, gen6_encoder_context, batch);
+ gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context, batch);
}
- gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
- gen6_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context);
+ gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context, batch);
+ gen6_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context, batch);
emit_new_state = 0;
}
if (is_intra) {
assert(msg);
- object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context);
+ object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, batch);
msg += 4;
} else {
- object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context);
+ object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, batch);
offset += 64;
}
@@ -832,8 +847,30 @@ void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
if (is_intra)
dri_bo_unmap(vme_context->vme_output.bo);
-
+
+ intel_batchbuffer_align(batch, 8);
+
+ BEGIN_BCS_BATCH(batch, 2);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
+ ADVANCE_BCS_BATCH(batch);
+
intel_batchbuffer_end_atomic(batch);
+
+ /* chain to the main batch buffer */
+ intel_batchbuffer_start_atomic_bcs(main_batch, 0x100);
+ intel_batchbuffer_emit_mi_flush(main_batch);
+ BEGIN_BCS_BATCH(main_batch, 2);
+ OUT_BCS_BATCH(main_batch, MI_BATCH_BUFFER_START | (1 << 8));
+ OUT_BCS_RELOC(main_batch,
+ batch->buffer,
+ I915_GEM_DOMAIN_COMMAND, 0,
+ 0);
+ ADVANCE_BCS_BATCH(main_batch);
+ intel_batchbuffer_end_atomic(main_batch);
+
+ // end programing
+ intel_batchbuffer_free(batch);
}
static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx,
@@ -921,7 +958,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
struct encode_state *encode_state,
struct gen6_encoder_context *gen6_encoder_context)
{
- gen6_mfc_init(ctx, gen6_encoder_context);
+ gen6_mfc_init(ctx, encode_state, gen6_encoder_context);
gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
gen6_mfc_stop(ctx, encode_state, gen6_encoder_context);
diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h
index 75bcf63..22cd62b 100644
--- a/src/gen6_mfc.h
+++ b/src/gen6_mfc.h
@@ -103,4 +103,12 @@ gen6_mfc_pipeline(VADriverContextP ctx,
Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context);
Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context);
+VAStatus
+gen75_mfc_pipeline(VADriverContextP ctx,
+ VAProfile profile,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context);
+Bool gen75_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context);
+Bool gen75_mfc_context_destroy(struct gen6_mfc_context *mfc_context);
+
#endif /* _GEN6_MFC_BCS_H_ */
diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
old mode 100644
new mode 100755
index c4bec7b..9c110c6
--- a/src/gen6_mfd.c
+++ b/src/gen6_mfd.c
@@ -26,14 +26,13 @@
*
*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
+#ifndef HAVE_GEN_AVC_SURFACE
+#define HAVE_GEN_AVC_SURFACE 1
+#endif
+#include "sysdeps.h"
#include "intel_batchbuffer.h"
#include "intel_driver.h"
-
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_decoder_utils.h"
@@ -167,37 +166,20 @@ gen6_mfd_avc_frame_store_index(VADriverContextP ctx,
}
}
-static void
-gen6_mfd_free_avc_surface(void **data)
-{
- struct gen6_avc_surface *gen6_avc_surface = *data;
-
- if (!gen6_avc_surface)
- return;
-
- dri_bo_unreference(gen6_avc_surface->dmv_top);
- gen6_avc_surface->dmv_top = NULL;
- dri_bo_unreference(gen6_avc_surface->dmv_bottom);
- gen6_avc_surface->dmv_bottom = NULL;
-
- free(gen6_avc_surface);
- *data = NULL;
-}
-
static void
gen6_mfd_init_avc_surface(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
struct object_surface *obj_surface)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct gen6_avc_surface *gen6_avc_surface = obj_surface->private_data;
+ GenAvcSurface *gen6_avc_surface = obj_surface->private_data;
int height_in_mbs;
- obj_surface->free_private_data = gen6_mfd_free_avc_surface;
+ obj_surface->free_private_data = gen_free_avc_surface;
height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
if (!gen6_avc_surface) {
- gen6_avc_surface = calloc(sizeof(struct gen6_avc_surface), 1);
+ gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
assert((obj_surface->size & 0x3f) == 0);
obj_surface->private_data = gen6_avc_surface;
}
@@ -413,27 +395,6 @@ gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
}
static void
-gen6_mfd_aes_state(VADriverContextP ctx,
- struct decode_state *decode_state,
- int standard_select)
-{
- /* FIXME */
-}
-
-static void
-gen6_mfd_wait(VADriverContextP ctx,
- struct decode_state *decode_state,
- int standard_select,
- struct gen6_mfd_context *gen6_mfd_context)
-{
- struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
-
- BEGIN_BCS_BATCH(batch, 1);
- OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
- ADVANCE_BCS_BATCH(batch);
-}
-
-static void
gen6_mfd_avc_img_state(VADriverContextP ctx,
struct decode_state *decode_state,
struct gen6_mfd_context *gen6_mfd_context)
@@ -575,7 +536,7 @@ gen6_mfd_avc_directmode_state(VADriverContextP ctx,
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
struct object_surface *obj_surface;
- struct gen6_avc_surface *gen6_avc_surface;
+ GenAvcSurface *gen6_avc_surface;
VAPictureH264 *va_pic;
int i, j;
@@ -1641,9 +1602,19 @@ gen6_mfd_vc1_pic_state(VADriverContextP ctx,
if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
- else
+ else {
trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
-
+ /*
+ * 8.3.6.2.1 Transform Type Selection
+ * If variable-sized transform coding is not enabled,
+ * then the 8x8 transform shall be used for all blocks.
+ * it is also MFX_VC1_PIC_STATE requirement.
+ */
+ if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
+ pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
+ pic_param->transform_fields.bits.frame_level_transform_type = 0;
+ }
+ }
if (picture_type == GEN6_VC1_B_PICTURE) {
struct gen6_vc1_surface *gen6_vc1_surface = NULL;
@@ -2031,7 +2002,7 @@ gen6_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
gen6_mfd_context->base.destroy = gen6_mfd_context_destroy;
gen6_mfd_context->base.run = gen6_mfd_decode_picture;
- gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
+ gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
diff --git a/src/gen6_mfd.h b/src/gen6_mfd.h
index 6e20364..de131d6 100644
--- a/src/gen6_mfd.h
+++ b/src/gen6_mfd.h
@@ -35,13 +35,6 @@
#include <intel_bufmgr.h>
#include "i965_decoder.h"
-struct gen6_avc_surface
-{
- dri_bo *dmv_top;
- dri_bo *dmv_bottom;
- int dmv_bottom_flag;
-};
-
#define GEN6_VC1_I_PICTURE 0
#define GEN6_VC1_P_PICTURE 1
#define GEN6_VC1_B_PICTURE 2
diff --git a/src/gen6_vme.c b/src/gen6_vme.c
index 2ffbd43..9fe8cd9 100644
--- a/src/gen6_vme.c
+++ b/src/gen6_vme.c
@@ -627,11 +627,9 @@ static VAStatus gen6_vme_constant_setup(VADriverContextP ctx,
struct gen6_encoder_context *gen6_encoder_context)
{
struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
- unsigned char *constant_buffer;
dri_bo_map(vme_context->curbe.bo, 1);
assert(vme_context->curbe.bo->virtual);
- constant_buffer = vme_context->curbe.bo->virtual;
/*TODO copy buffer into CURB*/
@@ -672,19 +670,26 @@ static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
-static void gen6_vme_pipeline_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_vme_pipeline_select(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
BEGIN_BATCH(batch, 1);
OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
ADVANCE_BATCH(batch);
}
-static void gen6_vme_state_base_address(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_vme_state_base_address(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
BEGIN_BATCH(batch, 10);
@@ -709,11 +714,15 @@ static void gen6_vme_state_base_address(VADriverContextP ctx, struct gen6_encode
ADVANCE_BATCH(batch);
}
-static void gen6_vme_vfe_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_vme_vfe_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BATCH(batch, 8);
OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | 6); /*Gen6 CMD_MEDIA_STATE_POINTERS = CMD_MEDIA_STATE */
@@ -732,11 +741,15 @@ static void gen6_vme_vfe_state(VADriverContextP ctx, struct gen6_encoder_context
}
-static void gen6_vme_curbe_load(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_vme_curbe_load(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BATCH(batch, 4);
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | 2);
@@ -748,11 +761,15 @@ static void gen6_vme_curbe_load(VADriverContextP ctx, struct gen6_encoder_contex
ADVANCE_BATCH(batch);
}
-static void gen6_vme_idrt(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+static void gen6_vme_idrt(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BATCH(batch, 4);
OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | 2);
@@ -767,14 +784,17 @@ static int gen6_vme_media_object(VADriverContextP ctx,
struct encode_state *encode_state,
int mb_x, int mb_y,
int kernel,
- struct gen6_encoder_context *gen6_encoder_context)
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
int mb_width = ALIGN(obj_surface->orig_width, 16) / 16;
int len_in_dowrds = 6 + 1;
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
BEGIN_BATCH(batch, len_in_dowrds);
OUT_BATCH(batch, CMD_MEDIA_OBJECT | (len_in_dowrds - 2));
@@ -844,7 +864,8 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
struct encode_state *encode_state,
struct gen6_encoder_context *gen6_encoder_context)
{
- struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *main_batch = gen6_encoder_context->base.batch;
VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
int is_intra = pSliceParameter->slice_flags.bits.is_intra;
@@ -852,8 +873,9 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
int emit_new_state = 1, object_len_in_bytes;
int x, y;
+ struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, width_in_mbs * height_in_mbs * 8 * 4 + 0x200);
- intel_batchbuffer_start_atomic(batch, 0x1000);
+ intel_batchbuffer_start_atomic(batch, width_in_mbs * height_in_mbs * 8 * 4 + 0x100);
for(y = 0; y < height_in_mbs; y++){
for(x = 0; x < width_in_mbs; x++){
@@ -863,19 +885,19 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
intel_batchbuffer_emit_mi_flush(batch);
/*Step2: State command PIPELINE_SELECT*/
- gen6_vme_pipeline_select(ctx, gen6_encoder_context);
+ gen6_vme_pipeline_select(ctx, gen6_encoder_context, batch);
/*Step3: State commands configuring pipeline states*/
- gen6_vme_state_base_address(ctx, gen6_encoder_context);
- gen6_vme_vfe_state(ctx, gen6_encoder_context);
- gen6_vme_curbe_load(ctx, gen6_encoder_context);
- gen6_vme_idrt(ctx, gen6_encoder_context);
+ gen6_vme_state_base_address(ctx, gen6_encoder_context, batch);
+ gen6_vme_vfe_state(ctx, gen6_encoder_context, batch);
+ gen6_vme_curbe_load(ctx, gen6_encoder_context, batch);
+ gen6_vme_idrt(ctx, gen6_encoder_context, batch);
emit_new_state = 0;
}
/*Step4: Primitive commands*/
- object_len_in_bytes = gen6_vme_media_object(ctx, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, gen6_encoder_context);
+ object_len_in_bytes = gen6_vme_media_object(ctx, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, gen6_encoder_context, batch);
if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
assert(0);
@@ -887,7 +909,29 @@ static void gen6_vme_pipeline_programing(VADriverContextP ctx,
}
}
- intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_align(batch, 8);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, MI_BATCH_BUFFER_END);
+ ADVANCE_BATCH(batch);
+
+ intel_batchbuffer_end_atomic(batch);
+
+ /* chain to the main batch buffer */
+ intel_batchbuffer_start_atomic(main_batch, 0x100);
+ intel_batchbuffer_emit_mi_flush(main_batch);
+ BEGIN_BATCH(main_batch, 2);
+ OUT_BATCH(main_batch, MI_BATCH_BUFFER_START | (2 << 6));
+ OUT_RELOC(main_batch,
+ batch->buffer,
+ I915_GEM_DOMAIN_COMMAND, 0,
+ 0);
+ ADVANCE_BATCH(main_batch);
+ intel_batchbuffer_end_atomic(main_batch);
+
+ // end programing
+ intel_batchbuffer_free(batch);
}
static VAStatus gen6_vme_prepare(VADriverContextP ctx,
diff --git a/src/gen6_vme.h b/src/gen6_vme.h
index 800898c..4d540c9 100644
--- a/src/gen6_vme.h
+++ b/src/gen6_vme.h
@@ -35,6 +35,7 @@
#include <intel_bufmgr.h>
+#define INTRA_VME_OUTPUT_IN_BYTES 16 /* in bytes */
#define MAX_INTERFACE_DESC_GEN6 32
#define MAX_MEDIA_SURFACES_GEN6 34
@@ -77,6 +78,7 @@ struct gen6_vme_context
} vme_output;
struct i965_kernel vme_kernels[GEN6_VME_KERNEL_NUMBER];
+ void *vme_state_message;
};
VAStatus gen6_vme_pipeline(VADriverContextP ctx,
@@ -86,4 +88,11 @@ VAStatus gen6_vme_pipeline(VADriverContextP ctx,
Bool gen6_vme_context_init(VADriverContextP ctx, struct gen6_vme_context *vme_context);
Bool gen6_vme_context_destroy(struct gen6_vme_context *vme_context);
+VAStatus gen75_vme_pipeline(VADriverContextP ctx,
+ VAProfile profile,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context);
+
+Bool gen75_vme_context_init(VADriverContextP ctx, struct gen6_vme_context *vme_context);
+Bool gen75_vme_context_destroy(struct gen6_vme_context *vme_context);
#endif /* _GEN6_VME_H_ */
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
new file mode 100644
index 0000000..0f2c62e
--- /dev/null
+++ b/src/gen75_mfc.c
@@ -0,0 +1,1183 @@
+/*
+ * Copyright © 2010-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao at intel.com>
+ * Xiang Haihao <haihao.xiang at intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "assert.h"
+#include "intel_batchbuffer.h"
+#include "i965_defines.h"
+#include "i965_structs.h"
+#include "i965_drv_video.h"
+#include "i965_encoder.h"
+
+#define B0_STEP_REV 2
+#define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
+
+static void
+gen75_mfc_pipe_mode_select(VADriverContextP ctx,
+ int standard_select,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ assert(standard_select == MFX_FORMAT_MPEG2 ||
+ standard_select == MFX_FORMAT_AVC);
+
+ BEGIN_BCS_BATCH(batch, 5);
+ OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
+ OUT_BCS_BATCH(batch,
+ (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
+ (MFD_MODE_VLD << 15) | /* VLD mode */
+ (0 << 10) | /* disable Stream-Out */
+ (1 << 9) | /* Post Deblocking Output */
+ (0 << 8) | /* Pre Deblocking Output */
+ (0 << 5) | /* not in stitch mode */
+ (1 << 4) | /* encoding mode */
+ (standard_select << 0)); /* standard select: avc or mpeg2 */
+ OUT_BCS_BATCH(batch,
+ (0 << 7) | /* expand NOA bus flag */
+ (0 << 6) | /* disable slice-level clock gating */
+ (0 << 5) | /* disable clock gating for NOA */
+ (0 << 4) | /* terminate if AVC motion and POC table error occurs */
+ (0 << 3) | /* terminate if AVC mbdata error occurs */
+ (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
+ (0 << 1) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+#define INTER_MODE_MASK 0x03
+#define INTER_8X8 0x03
+#define SUBMB_SHAPE_MASK 0x00FF00
+
+#define INTER_MV8 (4 << 20)
+#define INTER_MV32 (6 << 20)
+
+
+static void
+gen75_mfc_surface_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 6);
+
+ OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ ((mfc_context->surface_state.height - 1) << 18) |
+ ((mfc_context->surface_state.width - 1) << 4));
+ OUT_BCS_BATCH(batch,
+ (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
+ (0 << 22) | /* surface object control state, FIXME??? */
+ ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
+ (0 << 2) | /* must be 0 for interleave U/V */
+ (1 << 1) | /* must be tiled */
+ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* must be 0 for interleave U/V */
+ (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ int i;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 61);
+
+ OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
+
+ /* the DW1-3 is for pre_deblocking */
+ OUT_BCS_BATCH(batch, 0); /* pre output addr */
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* the DW4-6 is for the post_deblocking */
+
+ if (mfc_context->post_deblocking_output.bo)
+ OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0); /* post output addr */
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW7-9 is for the uncompressed_picture */
+ OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0); /* uncompressed data */
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW10-12 is for the mb status */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW13-15 is for the intra_row_store_scratch */
+ OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW16-18 is for the deblocking filter */
+ OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW 19-50 is for Reference pictures*/
+ for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
+ if ( mfc_context->reference_surfaces[i].bo != NULL) {
+ OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ }
+ OUT_BCS_BATCH(batch, 0);
+ }
+ OUT_BCS_BATCH(batch, 0);
+
+ /* The DW 52-54 is for the MB status buffer */
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW 55-57 is the ILDB buffer */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW 58-60 is the second ILDB buffer */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ int i;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_mfc_pipe_buf_addr_state_bplus(ctx, gen6_encoder_context, batch);
+ return;
+ }
+
+ BEGIN_BCS_BATCH(batch, 25);
+
+ OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
+
+ OUT_BCS_BATCH(batch, 0); /* pre output addr */
+
+ OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0); /* post output addr */
+
+ OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0); /* uncompressed data */
+
+ OUT_BCS_BATCH(batch, 0); /* StreamOut data*/
+ OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ /* 7..22 Reference pictures*/
+ for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
+ if ( mfc_context->reference_surfaces[i].bo != NULL) {
+ OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ }
+ }
+ OUT_BCS_BATCH(batch, 0); /* no block status */
+
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 26);
+
+ OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
+ /* the DW1-3 is for the MFX indirect bistream offset */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* the DW4-5 is the MFX upper bound */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW6-10 is for MFX Indirect MV Object Base Address */
+ OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
+ OUT_BCS_RELOC(batch,
+ mfc_context->mfc_indirect_pak_bse_object.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0x00000000);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_mfc_ind_obj_base_addr_state_bplus(ctx, gen6_encoder_context, batch);
+ return;
+ }
+
+ BEGIN_BCS_BATCH(batch, 11);
+
+ OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* MFX Indirect MV Object Base Address */
+ OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
+ OUT_BCS_RELOC(batch,
+ mfc_context->mfc_indirect_pak_bse_object.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0x00000000); /* must set, up to 2G */
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 10);
+
+ OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
+ OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW7-9 is for Bitplane Read Buffer Base Address */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, gen6_encoder_context, batch);
+ return;
+ }
+
+
+ BEGIN_BCS_BATCH(batch, 4);
+
+ OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
+ OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_avc_img_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+ int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 16);
+ OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
+ OUT_BCS_BATCH(batch,
+ ((width_in_mbs * height_in_mbs) & 0xFFFF));
+ OUT_BCS_BATCH(batch,
+ ((height_in_mbs - 1) << 16) |
+ ((width_in_mbs - 1) << 0));
+ OUT_BCS_BATCH(batch,
+ (0 << 24) | /* Second Chroma QP Offset */
+ (0 << 16) | /* Chroma QP Offset */
+ (0 << 14) | /* Max-bit conformance Intra flag */
+ (0 << 13) | /* Max Macroblock size conformance Inter flag */
+ (0 << 12) | /* FIXME: Weighted_Pred_Flag */
+ (0 << 10) | /* FIXME: Weighted_BiPred_Idc */
+ (0 << 8) | /* FIXME: Image Structure */
+ (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* Mininum Frame size */
+ (0 << 15) | /* Disable reading of Macroblock Status Buffer */
+ (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
+ (0 << 13) | /* CABAC 0 word insertion test enable */
+ (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
+ (1 << 10) | /* Chroma Format IDC, 4:2:0 */
+ (0 << 8) | /* FIXME: MbMvFormatFlag */
+ (1 << 7) | /* 0:CAVLC encoding mode,1:CABAC */
+ (0 << 6) | /* Only valid for VLD decoding mode */
+ (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
+ (0 << 4) | /* Direct 8x8 inference flag */
+ (0 << 3) | /* Only 8x8 IDCT Transform Mode Flag */
+ (1 << 2) | /* Frame MB only flag */
+ (0 << 1) | /* MBAFF mode is in active */
+ (0 << 0)); /* Field picture flag */
+ OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
+ OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
+ (0xBB8 << 16) | /* InterMbMaxSz */
+ (0xEE8) ); /* IntraMbMaxSz */
+ OUT_BCS_BATCH(batch, 0); /* Reserved */
+ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
+ OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
+ OUT_BCS_BATCH(batch, 0x8C000000);
+ OUT_BCS_BATCH(batch, 0x00010000);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ int i;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 71);
+
+ OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
+
+ /* Reference frames and Current frames */
+ /* the DW1-32 is for the direct MV for reference */
+ for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
+ if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
+ OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ }
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW34-36 is the MV for the current reference */
+ OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* POL list */
+ for(i = 0; i < 32; i++) {
+ OUT_BCS_BATCH(batch, i/2);
+ }
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void gen75_mfc_avc_directmode_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ int i;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_mfc_avc_directmode_state_bplus(ctx, gen6_encoder_context, batch);
+ return;
+ }
+
+ BEGIN_BCS_BATCH(batch, 69);
+
+ OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
+ //TODO: reference DMV
+ for (i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i++){
+ if (mfc_context->direct_mv_buffers[i].bo)
+ OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ //TODO: current DMV just for test
+#if 0
+ OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+#else
+ //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
+ //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
+ OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+#endif
+
+
+ OUT_BCS_BATCH(batch, 0);
+
+ //TODO: POL list
+ for(i = 0; i < 34; i++) {
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void gen75_mfc_avc_slice_state(VADriverContextP ctx,
+ int intra_slice,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 11);;
+
+ OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
+
+ if ( intra_slice )
+ OUT_BCS_BATCH(batch, 2); /*Slice Type: I Slice*/
+ else
+ OUT_BCS_BATCH(batch, 0); /*Slice Type: P Slice*/
+
+ if ( intra_slice )
+ OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/
+ else
+ OUT_BCS_BATCH(batch, 0x00010000); /*1 reference frame*/
+
+ OUT_BCS_BATCH(batch, (0<<24) | /*Enable deblocking operation*/
+ (26<<16) | /*Slice Quantization Parameter*/
+ 0x0202 );
+ OUT_BCS_BATCH(batch, 0); /*First MB X&Y , the postion of current slice*/
+ OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
+
+ OUT_BCS_BATCH(batch,
+ (0<<31) | /*RateControlCounterEnable = disable*/
+ (1<<30) | /*ResetRateControlCounter*/
+ (2<<28) | /*RC Triggle Mode = Loose Rate Control*/
+ (1<<19) | /*IsLastSlice*/
+ (0<<18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
+ (0<<17) | /*HeaderPresentFlag*/
+ (1<<16) | /*SliceData PresentFlag*/
+ (0<<15) | /*TailPresentFlag*/
+ (1<<13) | /*RBSP NAL TYPE*/
+ (0<<12) ); /*CabacZeroWordInsertionEnable*/
+
+
+ OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfc_qm_state(VADriverContextP ctx,
+ int qm_type,
+ unsigned int *qm,
+ int qm_length,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ unsigned int qm_buffer[16];
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ assert(qm_length <= 16);
+ assert(sizeof(*qm) == 4);
+ memcpy(qm_buffer, qm, qm_length * 4);
+
+ BEGIN_BCS_BATCH(batch, 18);
+ OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
+ OUT_BCS_BATCH(batch, qm_type << 0);
+ intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void gen75_mfc_avc_qm_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ unsigned int qm[16] = {
+ 0x10101010, 0x10101010, 0x10101010, 0x10101010,
+ 0x10101010, 0x10101010, 0x10101010, 0x10101010,
+ 0x10101010, 0x10101010, 0x10101010, 0x10101010,
+ 0x10101010, 0x10101010, 0x10101010, 0x10101010
+ };
+
+ gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context, batch);
+ gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context, batch);
+ gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context, batch);
+ gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context, batch);
+}
+
+static void
+gen75_mfc_fqm_state(VADriverContextP ctx,
+ int fqm_type,
+ unsigned int *fqm,
+ int fqm_length,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ unsigned int fqm_buffer[32];
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ assert(fqm_length <= 32);
+ assert(sizeof(*fqm) == 4);
+ memcpy(fqm_buffer, fqm, fqm_length * 4);
+
+ BEGIN_BCS_BATCH(batch, 34);
+ OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
+ OUT_BCS_BATCH(batch, fqm_type << 0);
+ intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void gen75_mfc_avc_fqm_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ unsigned int qm[32] = {
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
+ 0x10001000, 0x10001000, 0x10001000, 0x10001000
+ };
+
+ gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context, batch);
+ gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context, batch);
+ gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context, batch);
+ gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context, batch);
+}
+
+static void gen75_mfc_avc_ref_idx_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ int i;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 10);
+
+ OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
+ OUT_BCS_BATCH(batch, 0); //Select L0
+
+ OUT_BCS_BATCH(batch, 0x80808000); //Only 1 reference
+ for(i = 0; i < 7; i++) {
+ OUT_BCS_BATCH(batch, 0x80808080);
+ }
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static int
+gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ int len_in_dwords = 12;
+
+ unsigned int intra_msg;
+#define INTRA_MSG_FLAG (1 << 13)
+#define INTRA_MBTYPE_MASK (0x1F0000)
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, len_in_dwords);
+
+ intra_msg = msg[0] & 0xC0FF;
+ intra_msg |= INTRA_MSG_FLAG;
+ intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
+ OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ (0 << 24) | /* PackedMvNum, Debug*/
+ (0 << 20) | /* No motion vector */
+ (1 << 19) | /* CbpDcY */
+ (1 << 18) | /* CbpDcU */
+ (1 << 17) | /* CbpDcV */
+ intra_msg);
+
+ OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
+ OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
+ OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
+
+ /*Stuff for Intra MB*/
+ OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
+ OUT_BCS_BATCH(batch, msg[2]);
+ OUT_BCS_BATCH(batch, msg[3]&0xFC);
+
+ OUT_BCS_BATCH(batch, 0x00000); /*MaxSizeInWord and TargetSzieInWord*/
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+
+ return len_in_dwords;
+}
+
+static int gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
+ unsigned int offset, unsigned int *msg, struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ int len_in_dwords = 12;
+ unsigned int inter_msg;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, len_in_dwords);
+
+ OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
+
+ inter_msg = 32;
+ /* MV quantity */
+ if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
+ if (msg[1] & SUBMB_SHAPE_MASK)
+ inter_msg = 128;
+ }
+ OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
+
+ OUT_BCS_BATCH(batch, offset);
+ inter_msg = msg[0] & (0x1F00FFFF);
+ inter_msg |= INTER_MV8;
+ if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
+ (msg[1] & SUBMB_SHAPE_MASK)) {
+ inter_msg |= INTER_MV32;
+ }
+
+ OUT_BCS_BATCH(batch, inter_msg);
+
+ OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
+ OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
+ OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
+
+ /*Stuff for Inter MB*/
+ inter_msg = msg[1] >> 8;
+ OUT_BCS_BATCH(batch, inter_msg);
+ OUT_BCS_BATCH(batch, 0x0);
+ OUT_BCS_BATCH(batch, 0x0);
+
+ OUT_BCS_BATCH(batch, 0x00000000); /*MaxSizeInWord and TargetSzieInWord*/
+
+ OUT_BCS_BATCH(batch, 0x0);
+
+ ADVANCE_BCS_BATCH(batch);
+
+ return len_in_dwords;
+}
+
+static void gen75_mfc_init(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ dri_bo *bo;
+ int i;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+
+ /*Encode common setup for MFC*/
+ dri_bo_unreference(mfc_context->post_deblocking_output.bo);
+ mfc_context->post_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
+ mfc_context->pre_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
+ mfc_context->uncompressed_picture_source.bo = NULL;
+
+ dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
+ mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
+
+ for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
+ if (mfc_context->reference_surfaces[i].bo != NULL)
+ dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
+ mfc_context->reference_surfaces[i].bo = NULL;
+ }
+
+ dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ width_in_mbs * 64,
+ 64);
+ assert(bo);
+ mfc_context->intra_row_store_scratch_buffer.bo = bo;
+
+ dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
+ 64);
+ assert(bo);
+ mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+
+ dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
+ 0x1000);
+ assert(bo);
+ mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+}
+
+#define INTRA_RDO_OFFSET 4
+#define INTER_RDO_OFFSET 54
+#define INTER_MSG_OFFSET 52
+#define INTER_MV_OFFSET 224
+#define RDO_MASK 0xFFFF
+
+static void gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *main_batch = gen6_encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+ VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer; /* FIXME: multi slices */
+ unsigned int *msg = NULL, offset = 0;
+ unsigned char *msg_ptr = NULL;
+ int emit_new_state = 1, object_len_in_bytes;
+ int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+ int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
+ int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
+ int x,y, mb_index;
+ int inter_rdo, intra_rdo;
+ struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, width_in_mbs * height_in_mbs * 12 * 4 + 0x800);
+
+ intel_batchbuffer_start_atomic_bcs(batch, width_in_mbs * height_in_mbs * 12 * 4 + 0x700);
+
+ dri_bo_map(vme_context->vme_output.bo , 1);
+ msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
+ if (is_intra) {
+ msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
+ } else {
+ msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
+ offset = 0;
+ }
+
+ for (y = 0; y < height_in_mbs; y++) {
+ for (x = 0; x < width_in_mbs; x++) {
+ int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
+ int qp = pSequenceParameter->initial_qp;
+ mb_index = (y * width_in_mbs) + x;
+ if (emit_new_state) {
+ intel_batchbuffer_emit_mi_flush(batch);
+
+ gen75_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context, batch);
+ gen75_mfc_surface_state(ctx, gen6_encoder_context, batch);
+ gen75_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context, batch);
+
+ gen75_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context, batch);
+ gen75_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context, batch);
+
+ gen75_mfc_avc_img_state(ctx, gen6_encoder_context, batch);
+ gen75_mfc_avc_qm_state(ctx, gen6_encoder_context, batch);
+ gen75_mfc_avc_fqm_state(ctx, gen6_encoder_context, batch);
+ gen75_mfc_avc_directmode_state(ctx, gen6_encoder_context, batch);
+
+ gen75_mfc_avc_ref_idx_state(ctx, gen6_encoder_context, batch);
+ gen75_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context, batch);
+ emit_new_state = 0;
+ }
+
+ msg = (unsigned int *) (msg_ptr + mb_index * vme_context->vme_output.size_block);
+ if (is_intra) {
+ object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, batch);
+ } else {
+ inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
+ intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
+ if (intra_rdo < inter_rdo) {
+ object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, batch);
+ } else {
+ msg += INTER_MSG_OFFSET;
+ offset = mb_index * vme_context->vme_output.size_block + INTER_MV_OFFSET;
+ object_len_in_bytes = gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, msg, gen6_encoder_context, batch);
+ }
+ }
+ if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+ emit_new_state = 1;
+ intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+ }
+ }
+ }
+
+ dri_bo_unmap(vme_context->vme_output.bo);
+
+ intel_batchbuffer_align(batch, 8);
+
+ BEGIN_BCS_BATCH(batch, 2);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
+ ADVANCE_BCS_BATCH(batch);
+
+ intel_batchbuffer_end_atomic(batch);
+
+ /* chain to the main batch buffer */
+ intel_batchbuffer_start_atomic_bcs(main_batch, 0x100);
+ intel_batchbuffer_emit_mi_flush(main_batch);
+ BEGIN_BCS_BATCH(main_batch, 2);
+ OUT_BCS_BATCH(main_batch, MI_BATCH_BUFFER_START | (1 << 8));
+ OUT_BCS_RELOC(main_batch,
+ batch->buffer,
+ I915_GEM_DOMAIN_COMMAND, 0,
+ 0);
+ ADVANCE_BCS_BATCH(main_batch);
+ intel_batchbuffer_end_atomic(main_batch);
+
+ // end programing
+ intel_batchbuffer_free(batch);
+}
+
+static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+ struct object_surface *obj_surface;
+ struct object_buffer *obj_buffer;
+ dri_bo *bo;
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
+ VAStatus vaStatus = VA_STATUS_SUCCESS;
+
+ /*Setup all the input&output object*/
+ obj_surface = SURFACE(pPicParameter->reconstructed_picture);
+ assert(obj_surface);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ mfc_context->post_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->post_deblocking_output.bo);
+
+ mfc_context->surface_state.width = obj_surface->orig_width;
+ mfc_context->surface_state.height = obj_surface->orig_height;
+ mfc_context->surface_state.w_pitch = obj_surface->width;
+ mfc_context->surface_state.h_pitch = obj_surface->height;
+
+ obj_surface = SURFACE(pPicParameter->reference_picture);
+ assert(obj_surface);
+ if (obj_surface->bo != NULL) {
+ mfc_context->reference_surfaces[0].bo = obj_surface->bo;
+ dri_bo_reference(obj_surface->bo);
+ }
+
+ obj_surface = SURFACE(encode_state->current_render_target);
+ assert(obj_surface && obj_surface->bo);
+ mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
+
+ obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
+ bo = obj_buffer->buffer_store->bo;
+ assert(bo);
+ mfc_context->mfc_indirect_pak_bse_object.bo = bo;
+ mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
+ dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
+
+ /*Programing bcs pipeline*/
+ gen75_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context); //filling the pipeline
+
+ return vaStatus;
+}
+
+static VAStatus gen75_mfc_run(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+
+ intel_batchbuffer_flush(batch); //run the pipeline
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_mfc_stop(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+#if 0
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
+
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
+
+ struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
+ //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
+ //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
+ my_debug(obj_surface);
+
+#endif
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+gen75_mfc_avc_encode_picture(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ gen75_mfc_init(ctx, encode_state, gen6_encoder_context);
+ gen75_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
+ gen75_mfc_run(ctx, encode_state, gen6_encoder_context);
+ gen75_mfc_stop(ctx, encode_state, gen6_encoder_context);
+
+ return VA_STATUS_SUCCESS;
+}
+
+VAStatus
+gen75_mfc_pipeline(VADriverContextP ctx,
+ VAProfile profile,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ VAStatus vaStatus;
+
+ switch (profile) {
+ case VAProfileH264Baseline:
+ vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
+ break;
+
+ /* FIXME: add for other profile */
+ default:
+ vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+ break;
+ }
+
+ return vaStatus;
+}
+
+Bool gen75_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
+{
+ int i;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ dri_bo *bo;
+
+ for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
+ dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
+ mfc_context->direct_mv_buffers[i].bo = NULL;
+ }
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 68*8192,
+ 64);
+ mfc_context->direct_mv_buffers[0].bo = bo;
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 68*8192,
+ 64);
+ mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = bo;
+ return True;
+}
+
+Bool gen75_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
+{
+ int i;
+
+ dri_bo_unreference(mfc_context->post_deblocking_output.bo);
+ mfc_context->post_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
+ mfc_context->pre_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
+ mfc_context->uncompressed_picture_source.bo = NULL;
+
+ dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
+ mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
+
+ for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
+ dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
+ mfc_context->direct_mv_buffers[i].bo = NULL;
+ }
+
+ dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
+ mfc_context->intra_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
+ mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
+ mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+
+ return True;
+}
diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
new file mode 100644
index 0000000..78cb73b
--- /dev/null
+++ b/src/gen75_mfd.c
@@ -0,0 +1,3402 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Xiang Haihao <haihao.xiang at intel.com>
+ * Zhao Yakui <yakui.zhao at intel.com>
+ *
+ */
+
+#ifndef HAVE_GEN_AVC_SURFACE
+#define HAVE_GEN_AVC_SURFACE 1
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "i965_decoder_utils.h"
+
+#include "gen7_mfd.h"
+
+#define B0_STEP_REV 2
+#define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
+
+static const uint32_t zigzag_direct[64] = {
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static void
+gen75_mfd_avc_frame_store_index(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ int i, j;
+
+ assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
+
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ int found = 0;
+
+ if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
+ continue;
+
+ for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
+ VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
+ if (ref_pic->flags & VA_PICTURE_H264_INVALID)
+ continue;
+
+ if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
+ obj_surface->flags &= ~SURFACE_REFERENCED;
+
+ if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
+ dri_bo_unreference(obj_surface->bo);
+ obj_surface->bo = NULL;
+ obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
+ }
+
+ if (obj_surface->free_private_data)
+ obj_surface->free_private_data(&obj_surface->private_data);
+
+ gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
+ gen7_mfd_context->reference_surface[i].frame_store_id = -1;
+ }
+ }
+
+ for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
+ VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
+ int found = 0;
+
+ if (ref_pic->flags & VA_PICTURE_H264_INVALID)
+ continue;
+
+ for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
+ if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
+ continue;
+
+ if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ int frame_idx;
+ struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
+
+ assert(obj_surface);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+
+ for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
+ for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
+ if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
+ continue;
+
+ if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
+ break;
+ }
+
+ if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
+ break;
+ }
+
+ assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
+
+ for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
+ if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
+ gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
+ gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
+ break;
+ }
+ }
+ }
+ }
+
+ /* sort */
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
+ if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
+ gen7_mfd_context->reference_surface[i].frame_store_id == i)
+ continue;
+
+ for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
+ if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
+ gen7_mfd_context->reference_surface[j].frame_store_id == i) {
+ VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
+ int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
+
+ gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
+ gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
+ gen7_mfd_context->reference_surface[j].surface_id = id;
+ gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
+ break;
+ }
+ }
+ }
+}
+
+static void
+gen75_mfd_init_avc_surface(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ struct object_surface *obj_surface)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
+ int width_in_mbs, height_in_mbs;
+
+ obj_surface->free_private_data = gen_free_avc_surface;
+ width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
+
+ if (!gen7_avc_surface) {
+ gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
+ assert((obj_surface->size & 0x3f) == 0);
+ obj_surface->private_data = gen7_avc_surface;
+ }
+
+ gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
+ !pic_param->seq_fields.bits.direct_8x8_inference_flag);
+
+ if (gen7_avc_surface->dmv_top == NULL) {
+ gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
+ "direct mv w/r buffer",
+ width_in_mbs * height_in_mbs * 128,
+ 0x1000);
+ assert(gen7_avc_surface->dmv_top);
+ }
+
+ if (gen7_avc_surface->dmv_bottom_flag &&
+ gen7_avc_surface->dmv_bottom == NULL) {
+ gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
+ "direct mv w/r buffer",
+ width_in_mbs * height_in_mbs * 128,
+ 0x1000);
+ assert(gen7_avc_surface->dmv_bottom);
+ }
+}
+
+static void
+gen75_mfd_pipe_mode_select(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ assert(standard_select == MFX_FORMAT_MPEG2 ||
+ standard_select == MFX_FORMAT_AVC ||
+ standard_select == MFX_FORMAT_VC1 ||
+ standard_select == MFX_FORMAT_JPEG);
+
+ BEGIN_BCS_BATCH(batch, 5);
+ OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
+ OUT_BCS_BATCH(batch,
+ (MFX_LONG_MODE << 17) | /* Currently only support long format */
+ (MFD_MODE_VLD << 15) | /* VLD mode */
+ (0 << 10) | /* disable Stream-Out */
+ (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
+ (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
+ (0 << 5) | /* not in stitch mode */
+ (MFX_CODEC_DECODE << 4) | /* decoding mode */
+ (standard_select << 0));
+ OUT_BCS_BATCH(batch,
+ (0 << 4) | /* terminate if AVC motion and POC table error occurs */
+ (0 << 3) | /* terminate if AVC mbdata error occurs */
+ (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
+ (0 << 1) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
+ OUT_BCS_BATCH(batch, 0); /* reserved */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_surface_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
+ unsigned int y_cb_offset;
+ unsigned int y_cr_offset;
+
+ assert(obj_surface);
+
+ y_cb_offset = obj_surface->y_cb_offset;
+ y_cr_offset = obj_surface->y_cr_offset;
+
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ ((obj_surface->orig_height - 1) << 18) |
+ ((obj_surface->orig_width - 1) << 4));
+ OUT_BCS_BATCH(batch,
+ (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
+ (0 << 22) | /* surface object control state, ignored */
+ ((obj_surface->width - 1) << 3) | /* pitch */
+ (0 << 2) | /* must be 0 */
+ (1 << 1) | /* must be tiled */
+ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* X offset for U(Cb), must be 0 */
+ (y_cb_offset << 0)); /* Y offset for U(Cb) */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* X offset for V(Cr), must be 0 */
+ (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ int i;
+
+ BEGIN_BCS_BATCH(batch, 61);
+ OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
+ /* Pre-deblock 1-3 */
+ if (gen7_mfd_context->pre_deblocking_output.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* Post-debloing 4-6 */
+ if (gen7_mfd_context->post_deblocking_output.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* uncompressed-video & stream out 7-12 */
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* intra row-store scratch 13-15 */
+ if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* deblocking-filter-row-store 16-18 */
+ if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* DW 19..50 */
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ struct object_surface *obj_surface;
+
+ if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+ obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
+ assert(obj_surface && obj_surface->bo);
+
+ OUT_BCS_RELOC(batch, obj_surface->bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ }
+ OUT_BCS_BATCH(batch, 0);
+ }
+ /* reference property 51 */
+ OUT_BCS_BATCH(batch, 0);
+
+ /* Macroblock status & ILDB 52-57 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the second Macroblock status 58-60 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ int i;
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
+ standard_select, gen7_mfd_context);
+ return;
+ }
+
+ BEGIN_BCS_BATCH(batch, 25);
+ OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
+ if (gen7_mfd_context->pre_deblocking_output.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ if (gen7_mfd_context->post_deblocking_output.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+
+ if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ /* DW 7..22 */
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ struct object_surface *obj_surface;
+
+ if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+ obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
+ assert(obj_surface && obj_surface->bo);
+
+ OUT_BCS_RELOC(batch, obj_surface->bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ }
+ }
+
+ OUT_BCS_BATCH(batch, 0); /* ignore DW23 for decoding */
+ OUT_BCS_BATCH(batch, 0); /* ignore DW24 for decoding */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
+ dri_bo *slice_data_bo,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 26);
+ OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
+ /* MFX In BS 1-5 */
+ OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* Upper bound 4-5 */
+ OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX indirect MV 6-10 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX IT_COFF 11-15 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX IT_DBLK 16-20 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX PAK_BSE object for encoder 21-25 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
+ dri_bo *slice_data_bo,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
+ standard_select, gen7_mfd_context);
+ return;
+ }
+
+ BEGIN_BCS_BATCH(batch, 11);
+ OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
+ OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
+ OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 10);
+ OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
+
+ if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* MPR Row Store Scratch buffer 4-6 */
+ if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* Bitplane 7-9 */
+ if (gen7_mfd_context->bitplane_read_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ int standard_select,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
+ standard_select, gen7_mfd_context);
+ return;
+ }
+
+ BEGIN_BCS_BATCH(batch, 4);
+ OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
+
+ if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ if (gen7_mfd_context->bitplane_read_buffer.valid)
+ OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_qm_state(VADriverContextP ctx,
+ int qm_type,
+ unsigned char *qm,
+ int qm_length,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ unsigned int qm_buffer[16];
+
+ assert(qm_length <= 16 * 4);
+ memcpy(qm_buffer, qm, qm_length);
+
+ BEGIN_BCS_BATCH(batch, 18);
+ OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
+ OUT_BCS_BATCH(batch, qm_type << 0);
+ intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_avc_img_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int img_struct;
+ int mbaff_frame_flag;
+ unsigned int width_in_mbs, height_in_mbs;
+ VAPictureParameterBufferH264 *pic_param;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+ assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
+
+ if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
+ img_struct = 1;
+ else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
+ img_struct = 3;
+ else
+ img_struct = 0;
+
+ if ((img_struct & 0x1) == 0x1) {
+ assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
+ } else {
+ assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
+ }
+
+ if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
+ assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
+ assert(pic_param->pic_fields.bits.field_pic_flag == 0);
+ } else {
+ assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
+ }
+
+ mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
+ !pic_param->pic_fields.bits.field_pic_flag);
+
+ width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
+
+ /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
+ assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
+ pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
+ assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
+
+ BEGIN_BCS_BATCH(batch, 16);
+ OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
+ OUT_BCS_BATCH(batch,
+ width_in_mbs * height_in_mbs);
+ OUT_BCS_BATCH(batch,
+ ((height_in_mbs - 1) << 16) |
+ ((width_in_mbs - 1) << 0));
+ OUT_BCS_BATCH(batch,
+ ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
+ ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
+ (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
+ (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
+ (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
+ (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
+ (img_struct << 8));
+ OUT_BCS_BATCH(batch,
+ (pic_param->seq_fields.bits.chroma_format_idc << 10) |
+ (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
+ ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
+ (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
+ (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
+ (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
+ (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
+ (mbaff_frame_flag << 1) |
+ (pic_param->pic_fields.bits.field_pic_flag << 0));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_avc_qm_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAIQMatrixBufferH264 *iq_matrix;
+ VAPictureParameterBufferH264 *pic_param;
+
+ if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
+ iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
+ else
+ iq_matrix = &gen7_mfd_context->iq_matrix.h264;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+ gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
+ gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
+
+ if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
+ gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
+ gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
+ }
+}
+
+static void
+gen75_mfd_avc_picid_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 10);
+ OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
+ OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ struct object_surface *obj_surface;
+ GenAvcSurface *gen7_avc_surface;
+ VAPictureH264 *va_pic;
+ int i, j;
+
+ BEGIN_BCS_BATCH(batch, 71);
+ OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
+
+ /* reference surfaces 0..15 */
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+ obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
+ assert(obj_surface);
+ gen7_avc_surface = obj_surface->private_data;
+
+ if (gen7_avc_surface == NULL) {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ } else {
+ OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ }
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the current decoding frame/field */
+ va_pic = &pic_param->CurrPic;
+ assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+ obj_surface = SURFACE(va_pic->picture_id);
+ assert(obj_surface && obj_surface->bo && obj_surface->private_data);
+ gen7_avc_surface = obj_surface->private_data;
+
+ OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* POC List */
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+ int found = 0;
+ for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
+ va_pic = &pic_param->ReferenceFrames[j];
+
+ if (va_pic->flags & VA_PICTURE_H264_INVALID)
+ continue;
+
+ if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ assert(found == 1);
+ assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+
+ OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
+ OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ }
+
+ va_pic = &pic_param->CurrPic;
+ OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
+ OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_avc_directmode_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ struct object_surface *obj_surface;
+ GenAvcSurface *gen7_avc_surface;
+ VAPictureH264 *va_pic;
+ int i, j;
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_mfd_avc_directmode_state_bplus(ctx, pic_param, slice_param,
+ gen7_mfd_context);
+ return;
+ }
+
+ BEGIN_BCS_BATCH(batch, 69);
+ OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
+
+ /* reference surfaces 0..15 */
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+ obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
+ assert(obj_surface);
+ gen7_avc_surface = obj_surface->private_data;
+
+ if (gen7_avc_surface == NULL) {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ } else {
+ OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+
+ if (gen7_avc_surface->dmv_bottom_flag == 1)
+ OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ else
+ OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ }
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ }
+
+ /* the current decoding frame/field */
+ va_pic = &pic_param->CurrPic;
+ assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+ obj_surface = SURFACE(va_pic->picture_id);
+ assert(obj_surface && obj_surface->bo && obj_surface->private_data);
+ gen7_avc_surface = obj_surface->private_data;
+
+ OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ if (gen7_avc_surface->dmv_bottom_flag == 1)
+ OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ /* POC List */
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+ int found = 0;
+ for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
+ va_pic = &pic_param->ReferenceFrames[j];
+
+ if (va_pic->flags & VA_PICTURE_H264_INVALID)
+ continue;
+
+ if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
+ found = 1;
+ break;
+ }
+ }
+
+ assert(found == 1);
+ assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+
+ OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
+ OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ }
+
+ va_pic = &pic_param->CurrPic;
+ OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
+ OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_avc_slice_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ VASliceParameterBufferH264 *next_slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
+ int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
+ int num_ref_idx_l0, num_ref_idx_l1;
+ int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
+ pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
+ int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
+ int slice_type;
+
+ if (slice_param->slice_type == SLICE_TYPE_I ||
+ slice_param->slice_type == SLICE_TYPE_SI) {
+ slice_type = SLICE_TYPE_I;
+ } else if (slice_param->slice_type == SLICE_TYPE_P ||
+ slice_param->slice_type == SLICE_TYPE_SP) {
+ slice_type = SLICE_TYPE_P;
+ } else {
+ assert(slice_param->slice_type == SLICE_TYPE_B);
+ slice_type = SLICE_TYPE_B;
+ }
+
+ if (slice_type == SLICE_TYPE_I) {
+ assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
+ assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+ num_ref_idx_l0 = 0;
+ num_ref_idx_l1 = 0;
+ } else if (slice_type == SLICE_TYPE_P) {
+ assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+ num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_idx_l1 = 0;
+ } else {
+ num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+ num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
+ }
+
+ first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
+ slice_hor_pos = first_mb_in_slice % width_in_mbs;
+ slice_ver_pos = first_mb_in_slice / width_in_mbs;
+
+ if (next_slice_param) {
+ first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
+ next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
+ next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
+ } else {
+ next_slice_hor_pos = 0;
+ next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
+ }
+
+ BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
+ OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
+ OUT_BCS_BATCH(batch, slice_type);
+ OUT_BCS_BATCH(batch,
+ (num_ref_idx_l1 << 24) |
+ (num_ref_idx_l0 << 16) |
+ (slice_param->chroma_log2_weight_denom << 8) |
+ (slice_param->luma_log2_weight_denom << 0));
+ OUT_BCS_BATCH(batch,
+ (slice_param->direct_spatial_mv_pred_flag << 29) |
+ (slice_param->disable_deblocking_filter_idc << 27) |
+ (slice_param->cabac_init_idc << 24) |
+ ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
+ ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
+ ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
+ OUT_BCS_BATCH(batch,
+ (slice_ver_pos << 24) |
+ (slice_hor_pos << 16) |
+ (first_mb_in_slice << 0));
+ OUT_BCS_BATCH(batch,
+ (next_slice_ver_pos << 16) |
+ (next_slice_hor_pos << 0));
+ OUT_BCS_BATCH(batch,
+ (next_slice_param == NULL) << 19); /* last slice flag */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static inline void
+gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ gen6_send_avc_ref_idx_state(
+ gen7_mfd_context->base.batch,
+ slice_param,
+ gen7_mfd_context->reference_surface
+ );
+}
+
+static void
+gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int i, j, num_weight_offset_table = 0;
+ short weightoffsets[32 * 6];
+
+ if ((slice_param->slice_type == SLICE_TYPE_P ||
+ slice_param->slice_type == SLICE_TYPE_SP) &&
+ (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
+ num_weight_offset_table = 1;
+ }
+
+ if ((slice_param->slice_type == SLICE_TYPE_B) &&
+ (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
+ num_weight_offset_table = 2;
+ }
+
+ for (i = 0; i < num_weight_offset_table; i++) {
+ BEGIN_BCS_BATCH(batch, 98);
+ OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
+ OUT_BCS_BATCH(batch, i);
+
+ if (i == 0) {
+ for (j = 0; j < 32; j++) {
+ weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
+ weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
+ weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
+ weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
+ weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
+ weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
+ }
+ } else {
+ for (j = 0; j < 32; j++) {
+ weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
+ weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
+ weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
+ weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
+ weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
+ weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
+ }
+ }
+
+ intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
+ ADVANCE_BCS_BATCH(batch);
+ }
+}
+
+static void
+gen75_mfd_avc_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferH264 *pic_param,
+ VASliceParameterBufferH264 *slice_param,
+ dri_bo *slice_data_bo,
+ VASliceParameterBufferH264 *next_slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ unsigned int slice_data_bit_offset;
+
+ slice_data_bit_offset = avc_get_first_mb_bit_offset(
+ slice_data_bo,
+ slice_param,
+ pic_param->pic_fields.bits.entropy_coding_mode_flag
+ );
+
+ /* the input bitsteam format on GEN7 differs from GEN6 */
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
+ OUT_BCS_BATCH(batch,
+ (slice_param->slice_data_size - slice_param->slice_data_offset));
+ OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
+ OUT_BCS_BATCH(batch,
+ (0 << 31) |
+ (0 << 14) |
+ (0 << 12) |
+ (0 << 10) |
+ (0 << 8));
+ OUT_BCS_BATCH(batch,
+ ((slice_data_bit_offset >> 3) << 16) |
+ (0 << 5) |
+ (0 << 4) |
+ ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
+ (slice_data_bit_offset & 0x7));
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static inline void
+gen75_mfd_avc_context_init(
+ VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context
+)
+{
+ /* Initialize flat scaling lists */
+ avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
+}
+
+static void
+gen75_mfd_avc_decode_init(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAPictureParameterBufferH264 *pic_param;
+ VASliceParameterBufferH264 *slice_param;
+ VAPictureH264 *va_pic;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ dri_bo *bo;
+ int i, j, enable_avc_ildb = 0;
+ unsigned int width_in_mbs, height_in_mbs;
+
+ for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+ assert((slice_param->slice_type == SLICE_TYPE_I) ||
+ (slice_param->slice_type == SLICE_TYPE_SI) ||
+ (slice_param->slice_type == SLICE_TYPE_P) ||
+ (slice_param->slice_type == SLICE_TYPE_SP) ||
+ (slice_param->slice_type == SLICE_TYPE_B));
+
+ if (slice_param->disable_deblocking_filter_idc != 1) {
+ enable_avc_ildb = 1;
+ break;
+ }
+
+ slice_param++;
+ }
+ }
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+ gen75_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
+ width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
+ assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
+ assert(height_in_mbs > 0 && height_in_mbs <= 256);
+
+ /* Current decoded picture */
+ va_pic = &pic_param->CurrPic;
+ assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+ obj_surface = SURFACE(va_pic->picture_id);
+ assert(obj_surface);
+ obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
+ obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
+
+ dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
+
+ dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
+
+ dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "intra row store",
+ width_in_mbs * 64,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "deblocking filter row store",
+ width_in_mbs * 64 * 4,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ width_in_mbs * 64 * 2,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "mpr row store",
+ width_in_mbs * 64 * 2,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
+
+ gen7_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static void
+gen75_mfd_avc_decode_picture(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferH264 *pic_param;
+ VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
+ dri_bo *slice_data_bo;
+ int i, j;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+ gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
+
+ intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+ gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+ gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+ gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
+ gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
+ gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
+ gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
+
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+ gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+ assert((slice_param->slice_type == SLICE_TYPE_I) ||
+ (slice_param->slice_type == SLICE_TYPE_SI) ||
+ (slice_param->slice_type == SLICE_TYPE_P) ||
+ (slice_param->slice_type == SLICE_TYPE_SP) ||
+ (slice_param->slice_type == SLICE_TYPE_B));
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = next_slice_group_param;
+
+ gen75_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
+ gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
+ gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
+ gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
+ gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
+ slice_param++;
+ }
+ }
+
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+}
+
+static void
+gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAPictureParameterBufferMPEG2 *pic_param;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ dri_bo *bo;
+ unsigned int width_in_mbs;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+ width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
+
+ mpeg2_set_reference_surfaces(
+ ctx,
+ gen7_mfd_context->reference_surface,
+ decode_state,
+ pic_param
+ );
+
+ /* Current decoded picture */
+ obj_surface = SURFACE(decode_state->current_render_target);
+ assert(obj_surface);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+
+ dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ width_in_mbs * 96,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+ gen7_mfd_context->post_deblocking_output.valid = 0;
+ gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
+ gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
+ gen7_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static void
+gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferMPEG2 *pic_param;
+ unsigned int slice_concealment_disable_bit = 0;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+
+ /* XXX: disable concealment for now */
+ slice_concealment_disable_bit = 1;
+
+ BEGIN_BCS_BATCH(batch, 13);
+ OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
+ OUT_BCS_BATCH(batch,
+ (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
+ ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
+ ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
+ ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
+ pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
+ pic_param->picture_coding_extension.bits.picture_structure << 12 |
+ pic_param->picture_coding_extension.bits.top_field_first << 11 |
+ pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
+ pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
+ pic_param->picture_coding_extension.bits.q_scale_type << 8 |
+ pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
+ pic_param->picture_coding_extension.bits.alternate_scan << 6);
+ OUT_BCS_BATCH(batch,
+ pic_param->picture_coding_type << 9);
+ OUT_BCS_BATCH(batch,
+ (slice_concealment_disable_bit << 31) |
+ ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
+ ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
+ int i, j;
+
+ /* Update internal QM state */
+ if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
+ VAIQMatrixBufferMPEG2 * const iq_matrix =
+ (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
+
+ if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
+ iq_matrix->load_intra_quantiser_matrix) {
+ gen_iq_matrix->load_intra_quantiser_matrix =
+ iq_matrix->load_intra_quantiser_matrix;
+ if (iq_matrix->load_intra_quantiser_matrix) {
+ for (j = 0; j < 64; j++)
+ gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
+ iq_matrix->intra_quantiser_matrix[j];
+ }
+ }
+
+ if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
+ iq_matrix->load_non_intra_quantiser_matrix) {
+ gen_iq_matrix->load_non_intra_quantiser_matrix =
+ iq_matrix->load_non_intra_quantiser_matrix;
+ if (iq_matrix->load_non_intra_quantiser_matrix) {
+ for (j = 0; j < 64; j++)
+ gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
+ iq_matrix->non_intra_quantiser_matrix[j];
+ }
+ }
+ }
+
+ /* Commit QM state to HW */
+ for (i = 0; i < 2; i++) {
+ unsigned char *qm = NULL;
+ int qm_type;
+
+ if (i == 0) {
+ if (gen_iq_matrix->load_intra_quantiser_matrix) {
+ qm = gen_iq_matrix->intra_quantiser_matrix;
+ qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
+ }
+ } else {
+ if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
+ qm = gen_iq_matrix->non_intra_quantiser_matrix;
+ qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
+ }
+ }
+
+ if (!qm)
+ continue;
+
+ gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
+ }
+}
+
+static void
+gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferMPEG2 *pic_param,
+ VASliceParameterBufferMPEG2 *slice_param,
+ VASliceParameterBufferMPEG2 *next_slice_param,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
+ int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
+
+ if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
+ pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
+ is_field_pic = 1;
+ is_field_pic_wa = is_field_pic &&
+ gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
+
+ vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
+ hpos0 = slice_param->slice_horizontal_position;
+
+ if (next_slice_param == NULL) {
+ vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
+ hpos1 = 0;
+ } else {
+ vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
+ hpos1 = next_slice_param->slice_horizontal_position;
+ }
+
+ mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
+
+ BEGIN_BCS_BATCH(batch, 5);
+ OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
+ OUT_BCS_BATCH(batch,
+ hpos0 << 24 |
+ vpos0 << 16 |
+ mb_count << 8 |
+ (next_slice_param == NULL) << 5 |
+ (next_slice_param == NULL) << 3 |
+ (slice_param->macroblock_offset & 0x7));
+ OUT_BCS_BATCH(batch,
+ (slice_param->quantiser_scale_code << 24) |
+ (vpos1 << 8 | hpos1));
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferMPEG2 *pic_param;
+ VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
+ dri_bo *slice_data_bo;
+ int i, j;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+
+ gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
+ intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+ gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+ gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+ gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
+ gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
+ gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
+
+ if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
+ gen7_mfd_context->wa_mpeg2_slice_vertical_position =
+ mpeg2_wa_slice_vertical_position(decode_state, pic_param);
+
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+ gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = next_slice_group_param;
+
+ gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
+ slice_param++;
+ }
+ }
+
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+}
+
+static const int va_to_gen7_vc1_pic_type[5] = {
+ GEN7_VC1_I_PICTURE,
+ GEN7_VC1_P_PICTURE,
+ GEN7_VC1_B_PICTURE,
+ GEN7_VC1_BI_PICTURE,
+ GEN7_VC1_P_PICTURE,
+};
+
+static const int va_to_gen7_vc1_mv[4] = {
+ 1, /* 1-MV */
+ 2, /* 1-MV half-pel */
+ 3, /* 1-MV half-pef bilinear */
+ 0, /* Mixed MV */
+};
+
+static const int b_picture_scale_factor[21] = {
+ 128, 85, 170, 64, 192,
+ 51, 102, 153, 204, 43,
+ 215, 37, 74, 111, 148,
+ 185, 222, 32, 96, 160,
+ 224,
+};
+
+static const int va_to_gen7_vc1_condover[3] = {
+ 0,
+ 2,
+ 3
+};
+
+static const int va_to_gen7_vc1_profile[4] = {
+ GEN7_VC1_SIMPLE_PROFILE,
+ GEN7_VC1_MAIN_PROFILE,
+ GEN7_VC1_RESERVED_PROFILE,
+ GEN7_VC1_ADVANCED_PROFILE
+};
+
+static void
+gen75_mfd_free_vc1_surface(void **data)
+{
+ struct gen7_vc1_surface *gen7_vc1_surface = *data;
+
+ if (!gen7_vc1_surface)
+ return;
+
+ dri_bo_unreference(gen7_vc1_surface->dmv);
+ free(gen7_vc1_surface);
+ *data = NULL;
+}
+
+static void
+gen75_mfd_init_vc1_surface(VADriverContextP ctx,
+ VAPictureParameterBufferVC1 *pic_param,
+ struct object_surface *obj_surface)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
+ int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
+ int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
+
+ obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
+
+ if (!gen7_vc1_surface) {
+ gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
+ assert((obj_surface->size & 0x3f) == 0);
+ obj_surface->private_data = gen7_vc1_surface;
+ }
+
+ gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
+
+ if (gen7_vc1_surface->dmv == NULL) {
+ gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
+ "direct mv w/r buffer",
+ width_in_mbs * height_in_mbs * 64,
+ 0x1000);
+ }
+}
+
+static void
+gen75_mfd_vc1_decode_init(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAPictureParameterBufferVC1 *pic_param;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ int i;
+ dri_bo *bo;
+ int width_in_mbs;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+ width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
+
+ /* reference picture */
+ obj_surface = SURFACE(pic_param->forward_reference_picture);
+
+ if (obj_surface && obj_surface->bo)
+ gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
+ else
+ gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
+
+ obj_surface = SURFACE(pic_param->backward_reference_picture);
+
+ if (obj_surface && obj_surface->bo)
+ gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
+ else
+ gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
+
+ /* must do so !!! */
+ for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
+ gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
+
+ /* Current decoded picture */
+ obj_surface = SURFACE(decode_state->current_render_target);
+ assert(obj_surface);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
+
+ dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
+
+ dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
+
+ dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "intra row store",
+ width_in_mbs * 64,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "deblocking filter row store",
+ width_in_mbs * 6 * 64,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
+
+ dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ width_in_mbs * 96,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+ gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
+
+ gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
+ dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
+
+ if (gen7_mfd_context->bitplane_read_buffer.valid) {
+ int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
+ int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
+ int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
+ int src_w, src_h;
+ uint8_t *src = NULL, *dst = NULL;
+
+ assert(decode_state->bit_plane->buffer);
+ src = decode_state->bit_plane->buffer;
+
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "VC-1 Bitplane",
+ bitplane_width * height_in_mbs,
+ 0x1000);
+ assert(bo);
+ gen7_mfd_context->bitplane_read_buffer.bo = bo;
+
+ dri_bo_map(bo, True);
+ assert(bo->virtual);
+ dst = bo->virtual;
+
+ for (src_h = 0; src_h < height_in_mbs; src_h++) {
+ for(src_w = 0; src_w < width_in_mbs; src_w++) {
+ int src_index, dst_index;
+ int src_shift;
+ uint8_t src_value;
+
+ src_index = (src_h * width_in_mbs + src_w) / 2;
+ src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
+ src_value = ((src[src_index] >> src_shift) & 0xf);
+
+ dst_index = src_w / 2;
+ dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
+ }
+
+ if (src_w & 1)
+ dst[src_w / 2] >>= 4;
+
+ dst += bitplane_width;
+ }
+
+ dri_bo_unmap(bo);
+ } else
+ gen7_mfd_context->bitplane_read_buffer.bo = NULL;
+}
+
+static void
+gen75_mfd_vc1_pic_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferVC1 *pic_param;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
+ int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
+ int unified_mv_mode;
+ int ref_field_pic_polarity = 0;
+ int scale_factor = 0;
+ int trans_ac_y = 0;
+ int dmv_surface_valid = 0;
+ int brfd = 0;
+ int fcm = 0;
+ int picture_type;
+ int profile;
+ int overlap;
+ int interpolation_mode = 0;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+ profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
+ dquant = pic_param->pic_quantizer_fields.bits.dquant;
+ dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
+ dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
+ dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
+ dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
+ dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
+ alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
+
+ if (dquant == 0) {
+ alt_pquant_config = 0;
+ alt_pquant_edge_mask = 0;
+ } else if (dquant == 2) {
+ alt_pquant_config = 1;
+ alt_pquant_edge_mask = 0xf;
+ } else {
+ assert(dquant == 1);
+ if (dquantfrm == 0) {
+ alt_pquant_config = 0;
+ alt_pquant_edge_mask = 0;
+ alt_pq = 0;
+ } else {
+ assert(dquantfrm == 1);
+ alt_pquant_config = 1;
+
+ switch (dqprofile) {
+ case 3:
+ if (dqbilevel == 0) {
+ alt_pquant_config = 2;
+ alt_pquant_edge_mask = 0;
+ } else {
+ assert(dqbilevel == 1);
+ alt_pquant_config = 3;
+ alt_pquant_edge_mask = 0;
+ }
+ break;
+
+ case 0:
+ alt_pquant_edge_mask = 0xf;
+ break;
+
+ case 1:
+ if (dqdbedge == 3)
+ alt_pquant_edge_mask = 0x9;
+ else
+ alt_pquant_edge_mask = (0x3 << dqdbedge);
+
+ break;
+
+ case 2:
+ alt_pquant_edge_mask = (0x1 << dqsbedge);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+ }
+
+ if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
+ assert(pic_param->mv_fields.bits.mv_mode2 < 4);
+ unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
+ } else {
+ assert(pic_param->mv_fields.bits.mv_mode < 4);
+ unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
+ }
+
+ if (pic_param->sequence_fields.bits.interlace == 1 &&
+ pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
+ /* FIXME: calculate reference field picture polarity */
+ assert(0);
+ ref_field_pic_polarity = 0;
+ }
+
+ if (pic_param->b_picture_fraction < 21)
+ scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
+
+ picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
+
+ if (profile == GEN7_VC1_ADVANCED_PROFILE &&
+ picture_type == GEN7_VC1_I_PICTURE)
+ picture_type = GEN7_VC1_BI_PICTURE;
+
+ if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
+ trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
+ else
+ trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
+
+
+ if (picture_type == GEN7_VC1_B_PICTURE) {
+ struct gen7_vc1_surface *gen7_vc1_surface = NULL;
+
+ obj_surface = SURFACE(pic_param->backward_reference_picture);
+ assert(obj_surface);
+ gen7_vc1_surface = obj_surface->private_data;
+
+ if (!gen7_vc1_surface ||
+ (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
+ va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
+ dmv_surface_valid = 0;
+ else
+ dmv_surface_valid = 1;
+ }
+
+ assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
+
+ if (pic_param->picture_fields.bits.frame_coding_mode < 2)
+ fcm = pic_param->picture_fields.bits.frame_coding_mode;
+ else {
+ if (pic_param->picture_fields.bits.top_field_first)
+ fcm = 2;
+ else
+ fcm = 3;
+ }
+
+ if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
+ brfd = pic_param->reference_fields.bits.reference_distance;
+ brfd = (scale_factor * brfd) >> 8;
+ brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
+
+ if (brfd < 0)
+ brfd = 0;
+ }
+
+ overlap = pic_param->sequence_fields.bits.overlap;
+ if (profile != GEN7_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
+ overlap = 0;
+
+ assert(pic_param->conditional_overlap_flag < 3);
+ assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
+
+ if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
+ (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
+ pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
+ interpolation_mode = 9; /* Half-pel bilinear */
+ else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
+ (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
+ pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
+ interpolation_mode = 1; /* Half-pel bicubic */
+ else
+ interpolation_mode = 0; /* Quarter-pel bicubic */
+
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
+ OUT_BCS_BATCH(batch,
+ (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
+ ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
+ OUT_BCS_BATCH(batch,
+ ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
+ dmv_surface_valid << 15 |
+ (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
+ pic_param->rounding_control << 13 |
+ pic_param->sequence_fields.bits.syncmarker << 12 |
+ interpolation_mode << 8 |
+ 0 << 7 | /* FIXME: scale up or down ??? */
+ pic_param->range_reduction_frame << 6 |
+ pic_param->entrypoint_fields.bits.loopfilter << 5 |
+ overlap << 4 |
+ !pic_param->picture_fields.bits.is_first_field << 3 |
+ (pic_param->sequence_fields.bits.profile == 3) << 0);
+ OUT_BCS_BATCH(batch,
+ va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
+ picture_type << 26 |
+ fcm << 24 |
+ alt_pq << 16 |
+ pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
+ scale_factor << 0);
+ OUT_BCS_BATCH(batch,
+ unified_mv_mode << 28 |
+ pic_param->mv_fields.bits.four_mv_switch << 27 |
+ pic_param->fast_uvmc_flag << 26 |
+ ref_field_pic_polarity << 25 |
+ pic_param->reference_fields.bits.num_reference_pictures << 24 |
+ pic_param->reference_fields.bits.reference_distance << 20 |
+ pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
+ pic_param->mv_fields.bits.extended_dmv_range << 10 |
+ pic_param->mv_fields.bits.extended_mv_range << 8 |
+ alt_pquant_edge_mask << 4 |
+ alt_pquant_config << 2 |
+ pic_param->pic_quantizer_fields.bits.half_qp << 1 |
+ pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
+ OUT_BCS_BATCH(batch,
+ !!pic_param->bitplane_present.value << 31 |
+ !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
+ !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
+ !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
+ !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
+ !pic_param->bitplane_present.flags.bp_overflags << 26 |
+ !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
+ !pic_param->bitplane_present.flags.bp_field_tx << 24 |
+ pic_param->mv_fields.bits.mv_table << 20 |
+ pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
+ pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
+ pic_param->transform_fields.bits.frame_level_transform_type << 12 |
+ pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
+ pic_param->mb_mode_table << 8 |
+ trans_ac_y << 6 |
+ pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
+ pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
+ pic_param->cbp_table << 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferVC1 *pic_param;
+ int intensitycomp_single;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+ intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
+
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
+ OUT_BCS_BATCH(batch,
+ 0 << 14 | /* FIXME: double ??? */
+ 0 << 12 |
+ intensitycomp_single << 10 |
+ intensitycomp_single << 8 |
+ 0 << 4 | /* FIXME: interlace mode */
+ 0);
+ OUT_BCS_BATCH(batch,
+ pic_param->luma_shift << 16 |
+ pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferVC1 *pic_param;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+ obj_surface = SURFACE(decode_state->current_render_target);
+
+ if (obj_surface && obj_surface->private_data) {
+ dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+ }
+
+ obj_surface = SURFACE(pic_param->backward_reference_picture);
+
+ if (obj_surface && obj_surface->private_data) {
+ dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+ }
+
+ BEGIN_BCS_BATCH(batch, 7);
+ OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
+
+ if (dmv_write_buffer)
+ OUT_BCS_RELOC(batch, dmv_write_buffer,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ if (dmv_read_buffer)
+ OUT_BCS_RELOC(batch, dmv_read_buffer,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferVC1 *pic_param;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
+ return;
+ }
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+ obj_surface = SURFACE(decode_state->current_render_target);
+
+ if (obj_surface && obj_surface->private_data) {
+ dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+ }
+
+ obj_surface = SURFACE(pic_param->backward_reference_picture);
+
+ if (obj_surface && obj_surface->private_data) {
+ dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
+ }
+
+ BEGIN_BCS_BATCH(batch, 3);
+ OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
+
+ if (dmv_write_buffer)
+ OUT_BCS_RELOC(batch, dmv_write_buffer,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ if (dmv_read_buffer)
+ OUT_BCS_RELOC(batch, dmv_read_buffer,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ else
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static int
+gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
+{
+ int out_slice_data_bit_offset;
+ int slice_header_size = in_slice_data_bit_offset / 8;
+ int i, j;
+
+ if (profile != 3)
+ out_slice_data_bit_offset = in_slice_data_bit_offset;
+ else {
+ for (i = 0, j = 0; i < slice_header_size; i++, j++) {
+ if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
+ i++, j += 2;
+ }
+ }
+
+ out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
+ }
+
+ return out_slice_data_bit_offset;
+}
+
+static void
+gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferVC1 *pic_param,
+ VASliceParameterBufferVC1 *slice_param,
+ VASliceParameterBufferVC1 *next_slice_param,
+ dri_bo *slice_data_bo,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int next_slice_start_vert_pos;
+ int macroblock_offset;
+ uint8_t *slice_data = NULL;
+
+ dri_bo_map(slice_data_bo, 0);
+ slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
+ macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data,
+ slice_param->macroblock_offset,
+ pic_param->sequence_fields.bits.profile);
+ dri_bo_unmap(slice_data_bo);
+
+ if (next_slice_param)
+ next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
+ else
+ next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
+
+ BEGIN_BCS_BATCH(batch, 5);
+ OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_size - (macroblock_offset >> 3));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_offset + (macroblock_offset >> 3));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_vertical_position << 16 |
+ next_slice_start_vert_pos << 0);
+ OUT_BCS_BATCH(batch,
+ (macroblock_offset & 0x7));
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferVC1 *pic_param;
+ VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
+ dri_bo *slice_data_bo;
+ int i, j;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
+
+ gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
+ intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+ gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+ gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+ gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
+ gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
+ gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
+ gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
+
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+ gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = next_slice_group_param;
+
+ gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
+ slice_param++;
+ }
+ }
+
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+}
+
+static void
+gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ VAPictureParameterBufferJPEGBaseline *pic_param;
+ int subsampling = SUBSAMPLE_YUV420;
+
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+ if (pic_param->num_components == 1)
+ subsampling = SUBSAMPLE_YUV400;
+ else if (pic_param->num_components == 3) {
+ int h1 = pic_param->components[0].h_sampling_factor;
+ int h2 = pic_param->components[1].h_sampling_factor;
+ int h3 = pic_param->components[2].h_sampling_factor;
+ int v1 = pic_param->components[0].v_sampling_factor;
+ int v2 = pic_param->components[1].v_sampling_factor;
+ int v3 = pic_param->components[2].v_sampling_factor;
+
+ if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 1 && v3 == 1)
+ subsampling = SUBSAMPLE_YUV420;
+ else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1)
+ subsampling = SUBSAMPLE_YUV422H;
+ else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1)
+ subsampling = SUBSAMPLE_YUV444;
+ else if (h1 == 4 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1)
+ subsampling = SUBSAMPLE_YUV411;
+ else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 1 && v3 == 1)
+ subsampling = SUBSAMPLE_YUV422V;
+ else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 2 && v3 == 2)
+ subsampling = SUBSAMPLE_YUV422H;
+ else if (h2 == 2 && h2 == 2 && h3 == 2 &&
+ v1 == 2 && v2 == 1 && v3 == 1)
+ subsampling = SUBSAMPLE_YUV422V;
+ else
+ assert(0);
+ } else {
+ assert(0);
+ }
+
+ /* Current decoded picture */
+ obj_surface = SURFACE(decode_state->current_render_target);
+ assert(obj_surface);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
+
+ dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.valid = 1;
+
+ gen7_mfd_context->post_deblocking_output.bo = NULL;
+ gen7_mfd_context->post_deblocking_output.valid = 0;
+
+ gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
+ gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
+
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
+
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
+
+ gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
+ gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
+
+ gen7_mfd_context->bitplane_read_buffer.bo = NULL;
+ gen7_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static const int va_to_gen7_jpeg_rotation[4] = {
+ GEN7_JPEG_ROTATION_0,
+ GEN7_JPEG_ROTATION_90,
+ GEN7_JPEG_ROTATION_180,
+ GEN7_JPEG_ROTATION_270
+};
+
+static void
+gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferJPEGBaseline *pic_param;
+ int chroma_type = GEN7_YUV420;
+ int frame_width_in_blks;
+ int frame_height_in_blks;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+ if (pic_param->num_components == 1)
+ chroma_type = GEN7_YUV400;
+ else if (pic_param->num_components == 3) {
+ int h1 = pic_param->components[0].h_sampling_factor;
+ int h2 = pic_param->components[1].h_sampling_factor;
+ int h3 = pic_param->components[2].h_sampling_factor;
+ int v1 = pic_param->components[0].v_sampling_factor;
+ int v2 = pic_param->components[1].v_sampling_factor;
+ int v3 = pic_param->components[2].v_sampling_factor;
+
+ if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV420;
+ else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV422H_2Y;
+ else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV444;
+ else if (h1 == 4 && h2 == 1 && h3 == 1 &&
+ v1 == 1 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV411;
+ else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV422V_2Y;
+ else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+ v1 == 2 && v2 == 2 && v3 == 2)
+ chroma_type = GEN7_YUV422H_4Y;
+ else if (h2 == 2 && h2 == 2 && h3 == 2 &&
+ v1 == 2 && v2 == 1 && v3 == 1)
+ chroma_type = GEN7_YUV422V_4Y;
+ else
+ assert(0);
+ }
+
+ if (chroma_type == GEN7_YUV400 ||
+ chroma_type == GEN7_YUV444 ||
+ chroma_type == GEN7_YUV422V_2Y) {
+ frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
+ frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
+ } else if (chroma_type == GEN7_YUV411) {
+ frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
+ frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
+ } else {
+ frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
+ frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
+ }
+
+ BEGIN_BCS_BATCH(batch, 3);
+ OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
+ OUT_BCS_BATCH(batch,
+ (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
+ (chroma_type << 0));
+ OUT_BCS_BATCH(batch,
+ ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
+ ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static const int va_to_gen7_jpeg_hufftable[2] = {
+ MFX_HUFFTABLE_ID_Y,
+ MFX_HUFFTABLE_ID_UV
+};
+
+static void
+gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context,
+ int num_tables)
+{
+ VAHuffmanTableBufferJPEGBaseline *huffman_table;
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int index;
+
+ if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
+ return;
+
+ huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
+
+ for (index = 0; index < num_tables; index++) {
+ int id = va_to_gen7_jpeg_hufftable[index];
+ BEGIN_BCS_BATCH(batch, 53);
+ OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
+ OUT_BCS_BATCH(batch, id);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
+ ADVANCE_BCS_BATCH(batch);
+ }
+}
+
+static const int va_to_gen7_jpeg_qm[5] = {
+ -1,
+ MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
+ MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
+ MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
+ MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
+};
+
+static void
+gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ VAPictureParameterBufferJPEGBaseline *pic_param;
+ VAIQMatrixBufferJPEGBaseline *iq_matrix;
+ int index;
+
+ if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
+ return;
+
+ iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+ assert(pic_param->num_components <= 3);
+
+ for (index = 0; index < pic_param->num_components; index++) {
+ int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
+ unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
+ unsigned char raster_qm[64];
+ int j;
+
+ if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
+ continue;
+
+ for (j = 0; j < 64; j++)
+ raster_qm[zigzag_direct[j]] = qm[j];
+
+ gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
+ }
+}
+
+static void
+gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
+ VAPictureParameterBufferJPEGBaseline *pic_param,
+ VASliceParameterBufferJPEGBaseline *slice_param,
+ VASliceParameterBufferJPEGBaseline *next_slice_param,
+ dri_bo *slice_data_bo,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int scan_component_mask = 0;
+ int i;
+
+ assert(slice_param->num_components > 0);
+ assert(slice_param->num_components < 4);
+ assert(slice_param->num_components <= pic_param->num_components);
+
+ for (i = 0; i < slice_param->num_components; i++) {
+ switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
+ case 1:
+ scan_component_mask |= (1 << 0);
+ break;
+ case 2:
+ scan_component_mask |= (1 << 1);
+ break;
+ case 3:
+ scan_component_mask |= (1 << 2);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_size);
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_data_offset);
+ OUT_BCS_BATCH(batch,
+ slice_param->slice_horizontal_position << 16 |
+ slice_param->slice_vertical_position << 0);
+ OUT_BCS_BATCH(batch,
+ ((slice_param->num_components != 1) << 30) | /* interleaved */
+ (scan_component_mask << 27) | /* scan components */
+ (0 << 26) | /* disable interrupt allowed */
+ (slice_param->num_mcus << 0)); /* MCU count */
+ OUT_BCS_BATCH(batch,
+ (slice_param->restart_interval << 0)); /* RestartInterval */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+/* Workaround for JPEG decoding on Ivybridge */
+
+VAStatus
+i965_DestroySurfaces(VADriverContextP ctx,
+ VASurfaceID *surface_list,
+ int num_surfaces);
+VAStatus
+i965_CreateSurfaces(VADriverContextP ctx,
+ int width,
+ int height,
+ int format,
+ int num_surfaces,
+ VASurfaceID *surfaces);
+
+static struct {
+ int width;
+ int height;
+ unsigned char data[32];
+ int data_size;
+ int data_bit_offset;
+ int qp;
+} gen7_jpeg_wa_clip = {
+ 16,
+ 16,
+ {
+ 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
+ 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
+ },
+ 14,
+ 40,
+ 28,
+};
+
+static void
+gen75_jpeg_wa_init(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ VAStatus status;
+ struct object_surface *obj_surface;
+
+ if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
+ i965_DestroySurfaces(ctx,
+ &gen7_mfd_context->jpeg_wa_surface_id,
+ 1);
+
+ status = i965_CreateSurfaces(ctx,
+ gen7_jpeg_wa_clip.width,
+ gen7_jpeg_wa_clip.height,
+ VA_RT_FORMAT_YUV420,
+ 1,
+ &gen7_mfd_context->jpeg_wa_surface_id);
+ assert(status == VA_STATUS_SUCCESS);
+
+ obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
+ assert(obj_surface);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
+
+ if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
+ gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "JPEG WA data",
+ 0x1000,
+ 0x1000);
+ dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
+ 0,
+ gen7_jpeg_wa_clip.data_size,
+ gen7_jpeg_wa_clip.data);
+ }
+}
+
+static void
+gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 5);
+ OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
+ OUT_BCS_BATCH(batch,
+ (MFX_LONG_MODE << 17) | /* Currently only support long format */
+ (MFD_MODE_VLD << 15) | /* VLD mode */
+ (0 << 10) | /* disable Stream-Out */
+ (0 << 9) | /* Post Deblocking Output */
+ (1 << 8) | /* Pre Deblocking Output */
+ (0 << 5) | /* not in stitch mode */
+ (MFX_CODEC_DECODE << 4) | /* decoding mode */
+ (MFX_FORMAT_AVC << 0));
+ OUT_BCS_BATCH(batch,
+ (0 << 4) | /* terminate if AVC motion and POC table error occurs */
+ (0 << 3) | /* terminate if AVC mbdata error occurs */
+ (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
+ (0 << 1) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
+ OUT_BCS_BATCH(batch, 0); /* reserved */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_surface_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ ((obj_surface->orig_width - 1) << 18) |
+ ((obj_surface->orig_height - 1) << 4));
+ OUT_BCS_BATCH(batch,
+ (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (1 << 27) | /* interleave chroma, set to 0 for JPEG */
+ (0 << 22) | /* surface object control state, ignored */
+ ((obj_surface->width - 1) << 3) | /* pitch */
+ (0 << 2) | /* must be 0 */
+ (1 << 1) | /* must be tiled */
+ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* X offset for U(Cb), must be 0 */
+ (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
+ OUT_BCS_BATCH(batch,
+ (0 << 16) | /* X offset for V(Cr), must be 0 */
+ (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
+ ADVANCE_BCS_BATCH(batch);
+}
+
+
+static void
+gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ dri_bo *intra_bo;
+ int i;
+
+ intra_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "intra row store",
+ 128 * 64,
+ 0x1000);
+
+ BEGIN_BCS_BATCH(batch, 61);
+ OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
+ OUT_BCS_RELOC(batch,
+ obj_surface->bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+
+ OUT_BCS_BATCH(batch, 0); /* post deblocking */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* uncompressed-video & stream out 7-12 */
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW 13-15 is for intra row store scratch */
+ OUT_BCS_RELOC(batch,
+ intra_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW 16-18 is for deblocking filter */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* DW 19..50 */
+ for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the DW52-54 is for mb status address */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ /* the DW56-60 is for ILDB & second ILDB address */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+
+ dri_bo_unreference(intra_bo);
+}
+
+static void
+gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ dri_bo *intra_bo;
+ int i;
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
+ return;
+ }
+ intra_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "intra row store",
+ 128 * 64,
+ 0x1000);
+
+ BEGIN_BCS_BATCH(batch, 25);
+ OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
+ OUT_BCS_RELOC(batch,
+ obj_surface->bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ OUT_BCS_BATCH(batch, 0); /* post deblocking */
+
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+ OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
+
+ OUT_BCS_RELOC(batch,
+ intra_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ OUT_BCS_BATCH(batch, 0);
+
+ /* DW 7..22 */
+ for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ OUT_BCS_BATCH(batch, 0); /* ignore DW23 for decoding */
+ OUT_BCS_BATCH(batch, 0); /* ignore DW24 for decoding */
+ ADVANCE_BCS_BATCH(batch);
+
+ dri_bo_unreference(intra_bo);
+}
+
+static void
+gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ dri_bo *bsd_mpc_bo, *mpr_bo;
+
+ bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ 11520, /* 1.5 * 120 * 64 */
+ 0x1000);
+
+ mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "mpr row store",
+ 7680, /* 1. 0 * 120 * 64 */
+ 0x1000);
+
+ BEGIN_BCS_BATCH(batch, 10);
+ OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
+
+ OUT_BCS_RELOC(batch,
+ bsd_mpc_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_RELOC(batch,
+ mpr_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+
+ dri_bo_unreference(bsd_mpc_bo);
+ dri_bo_unreference(mpr_bo);
+}
+
+static void
+gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ dri_bo *bsd_mpc_bo, *mpr_bo;
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
+ return;
+ }
+
+ bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "bsd mpc row store",
+ 11520, /* 1.5 * 120 * 64 */
+ 0x1000);
+
+ mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
+ "mpr row store",
+ 7680, /* 1. 0 * 120 * 64 */
+ 0x1000);
+
+ BEGIN_BCS_BATCH(batch, 4);
+ OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
+
+ OUT_BCS_RELOC(batch,
+ bsd_mpc_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+
+ OUT_BCS_RELOC(batch,
+ mpr_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+
+ dri_bo_unreference(bsd_mpc_bo);
+ dri_bo_unreference(mpr_bo);
+}
+
+static void
+gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+
+}
+
+static void
+gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int img_struct = 0;
+ int mbaff_frame_flag = 0;
+ unsigned int width_in_mbs = 1, height_in_mbs = 1;
+
+ BEGIN_BCS_BATCH(batch, 16);
+ OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
+ OUT_BCS_BATCH(batch,
+ width_in_mbs * height_in_mbs);
+ OUT_BCS_BATCH(batch,
+ ((height_in_mbs - 1) << 16) |
+ ((width_in_mbs - 1) << 0));
+ OUT_BCS_BATCH(batch,
+ (0 << 24) |
+ (0 << 16) |
+ (0 << 14) |
+ (0 << 13) |
+ (0 << 12) | /* differ from GEN6 */
+ (0 << 10) |
+ (img_struct << 8));
+ OUT_BCS_BATCH(batch,
+ (1 << 10) | /* 4:2:0 */
+ (1 << 7) | /* CABAC */
+ (0 << 6) |
+ (0 << 5) |
+ (0 << 4) |
+ (0 << 3) |
+ (1 << 2) |
+ (mbaff_frame_flag << 1) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int i;
+
+ BEGIN_BCS_BATCH(batch, 71);
+ OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
+
+ /* reference surfaces 0..15 */
+ for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ OUT_BCS_BATCH(batch, 0); /* top */
+ OUT_BCS_BATCH(batch, 0); /* bottom */
+ }
+
+ OUT_BCS_BATCH(batch, 0);
+
+ /* the current decoding frame/field */
+ OUT_BCS_BATCH(batch, 0); /* top */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* POC List */
+ for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int i;
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
+ return;
+ }
+
+ BEGIN_BCS_BATCH(batch, 69);
+ OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
+
+ /* reference surfaces 0..15 */
+ for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ OUT_BCS_BATCH(batch, 0); /* top */
+ OUT_BCS_BATCH(batch, 0); /* bottom */
+ }
+
+ /* the current decoding frame/field */
+ OUT_BCS_BATCH(batch, 0); /* top */
+ OUT_BCS_BATCH(batch, 0); /* bottom */
+
+ /* POC List */
+ for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 26);
+ OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
+ OUT_BCS_RELOC(batch,
+ gen7_mfd_context->jpeg_wa_slice_data_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX indirect MV 6-10 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX IT_COFF 11-15 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX IT_DBLK 16-20 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ /* MFX PAK_BSE object for encoder 21-25 */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ if (IS_STEPPING_BPLUS(i965)) {
+ gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
+ return;
+ }
+
+ BEGIN_BCS_BATCH(batch, 11);
+ OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
+ OUT_BCS_RELOC(batch,
+ gen7_mfd_context->jpeg_wa_slice_data_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0);
+ OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+
+ /* the input bitsteam format on GEN7 differs from GEN6 */
+ BEGIN_BCS_BATCH(batch, 6);
+ OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
+ OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ (0 << 31) |
+ (0 << 14) |
+ (0 << 12) |
+ (0 << 10) |
+ (0 << 8));
+ OUT_BCS_BATCH(batch,
+ ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
+ (0 << 5) |
+ (0 << 4) |
+ (1 << 3) | /* LastSlice Flag */
+ (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
+ int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
+ int first_mb_in_slice = 0;
+ int slice_type = SLICE_TYPE_I;
+
+ BEGIN_BCS_BATCH(batch, 11);
+ OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
+ OUT_BCS_BATCH(batch, slice_type);
+ OUT_BCS_BATCH(batch,
+ (num_ref_idx_l1 << 24) |
+ (num_ref_idx_l0 << 16) |
+ (0 << 8) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch,
+ (0 << 29) |
+ (1 << 27) | /* disable Deblocking */
+ (0 << 24) |
+ (gen7_jpeg_wa_clip.qp << 16) |
+ (0 << 8) |
+ (0 << 0));
+ OUT_BCS_BATCH(batch,
+ (slice_ver_pos << 24) |
+ (slice_hor_pos << 16) |
+ (first_mb_in_slice << 0));
+ OUT_BCS_BATCH(batch,
+ (next_slice_ver_pos << 16) |
+ (next_slice_hor_pos << 0));
+ OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen75_mfd_jpeg_wa(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ gen75_jpeg_wa_init(ctx, gen7_mfd_context);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
+ gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
+ gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
+ gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
+ gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
+ gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
+ gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
+
+ gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
+ gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
+ gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
+}
+
+void
+gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
+ VAPictureParameterBufferJPEGBaseline *pic_param;
+ VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
+ dri_bo *slice_data_bo;
+ int i, j, max_selector = 0;
+
+ assert(decode_state->pic_param && decode_state->pic_param->buffer);
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
+
+ /* Currently only support Baseline DCT */
+ gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
+ intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
+ gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
+ intel_batchbuffer_emit_mi_flush(batch);
+ gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
+ gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
+ gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
+ gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
+ gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
+
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+ gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ int component;
+
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = next_slice_group_param;
+
+ for (component = 0; component < slice_param->num_components; component++) {
+ if (max_selector < slice_param->components[component].dc_table_selector)
+ max_selector = slice_param->components[component].dc_table_selector;
+
+ if (max_selector < slice_param->components[component].ac_table_selector)
+ max_selector = slice_param->components[component].ac_table_selector;
+ }
+
+ slice_param++;
+ }
+ }
+
+ assert(max_selector < 2);
+ gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
+
+ for (j = 0; j < decode_state->num_slice_params; j++) {
+ assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+ slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
+ slice_data_bo = decode_state->slice_datas[j]->bo;
+ gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
+
+ if (j == decode_state->num_slice_params - 1)
+ next_slice_group_param = NULL;
+ else
+ next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
+
+ for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+ assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+ if (i < decode_state->slice_params[j]->num_elements - 1)
+ next_slice_param = slice_param + 1;
+ else
+ next_slice_param = next_slice_group_param;
+
+ gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
+ slice_param++;
+ }
+ }
+
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+}
+
+static void
+gen75_mfd_decode_picture(VADriverContextP ctx,
+ VAProfile profile,
+ union codec_state *codec_state,
+ struct hw_context *hw_context)
+
+{
+ struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
+ struct decode_state *decode_state = &codec_state->decode;
+
+ assert(gen7_mfd_context);
+
+ gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
+
+ switch (profile) {
+ case VAProfileMPEG2Simple:
+ case VAProfileMPEG2Main:
+ gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
+ break;
+
+ case VAProfileH264Baseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
+ break;
+
+ case VAProfileVC1Simple:
+ case VAProfileVC1Main:
+ case VAProfileVC1Advanced:
+ gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
+ break;
+
+ case VAProfileJPEGBaseline:
+ gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+}
+
+static void
+gen75_mfd_context_destroy(void *hw_context)
+{
+ struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
+
+ dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
+ gen7_mfd_context->post_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
+ gen7_mfd_context->pre_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
+ gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+ gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+ gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
+ gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
+ gen7_mfd_context->bitplane_read_buffer.bo = NULL;
+
+ dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
+
+ intel_batchbuffer_free(gen7_mfd_context->base.batch);
+ free(gen7_mfd_context);
+}
+
+static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
+ struct gen7_mfd_context *gen7_mfd_context)
+{
+ gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
+ gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
+ gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
+ gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
+}
+
+struct hw_context *
+gen75_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
+{
+ struct intel_driver_data *intel = intel_driver_data(ctx);
+ struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
+ int i;
+
+ gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
+ gen7_mfd_context->base.run = gen75_mfd_decode_picture;
+ gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
+
+ for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
+ gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
+ gen7_mfd_context->reference_surface[i].frame_store_id = -1;
+ }
+
+ gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
+
+ switch (profile) {
+ case VAProfileMPEG2Simple:
+ case VAProfileMPEG2Main:
+ gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
+ break;
+
+ case VAProfileH264Baseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
+ break;
+ default:
+ break;
+ }
+ return (struct hw_context *)gen7_mfd_context;
+}
diff --git a/src/gen75_vme.c b/src/gen75_vme.c
new file mode 100644
index 0000000..4c39a23
--- /dev/null
+++ b/src/gen75_vme.c
@@ -0,0 +1,811 @@
+/*
+ * Copyright © 2010-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Zhao Yakui <yakui.zhao at intel.com>
+ * Xiang HaiHao <haihao.xiang at intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+#include "gen6_vme.h"
+#include "i965_encoder.h"
+
+#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+
+#define SURFACE_STATE_PADDED_SIZE_0_GEN6 ALIGN(sizeof(struct i965_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_GEN6 ALIGN(sizeof(struct i965_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN6 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+
+#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
+#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6)
+
+#define VME_INTRA_SHADER 0
+#define VME_INTER_SHADER 1
+
+#define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */
+#define CURBE_TOTAL_DATA_LENGTH (4 * 32) /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
+#define CURBE_URB_ENTRY_LENGTH 4 /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
+
+#define VME_MSG_LENGTH 32
+
+static const uint32_t gen75_vme_intra_frame[][4] = {
+#include "shaders/vme/intra_frame_haswell.g75b"
+};
+
+static const uint32_t gen75_vme_inter_frame[][4] = {
+#include "shaders/vme/inter_frame_haswell.g75b"
+};
+
+static struct i965_kernel gen75_vme_kernels[] = {
+ {
+ "VME Intra Frame",
+ VME_INTRA_SHADER, /*index*/
+ gen75_vme_intra_frame,
+ sizeof(gen75_vme_intra_frame),
+ NULL
+ },
+ {
+ "VME inter Frame",
+ VME_INTER_SHADER,
+ gen75_vme_inter_frame,
+ sizeof(gen75_vme_inter_frame),
+ NULL
+ }
+};
+
+/*
+ * Surface state for IvyBridge
+ */
+static
+void gen75_vme_set_common_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss0.tiled_surface = 0;
+ ss->ss0.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+static void
+gen75_vme_set_source_surface_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss2.tiled_surface = 0;
+ ss->ss2.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+
+/* only used for VME source surface state */
+static void gen75_vme_source_surface_state(VADriverContextP ctx,
+ int index,
+ struct object_surface *obj_surface,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ struct gen7_surface_state2 *ss;
+ dri_bo *bo;
+ int w, h, w_pitch, h_pitch;
+ unsigned int tiling, swizzle;
+
+ assert(obj_surface->bo);
+
+ w = obj_surface->orig_width;
+ h = obj_surface->orig_height;
+ w_pitch = obj_surface->width;
+ h_pitch = obj_surface->height;
+
+ bo = vme_context->surface_state_binding_table.bo;
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+
+ ss = (struct gen7_surface_state2 *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.surface_base_address = obj_surface->bo->offset;
+
+ ss->ss1.cbcr_pixel_offset_v_direction = 2;
+ ss->ss1.width = w - 1;
+ ss->ss1.height = h - 1;
+
+ ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
+ ss->ss2.interleave_chroma = 1;
+ ss->ss2.pitch = w_pitch - 1;
+ ss->ss2.half_pitch_for_chroma = 0;
+
+ dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+ gen75_vme_set_source_surface_tiling(ss, tiling);
+
+ /* UV offset for interleave mode */
+ ss->ss3.x_offset_for_cb = 0;
+ ss->ss3.y_offset_for_cb = h_pitch;
+
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_RENDER, 0,
+ 0,
+ SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
+ obj_surface->bo);
+
+ ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+ dri_bo_unmap(bo);
+}
+
+static void
+gen75_vme_media_source_surface_state(VADriverContextP ctx,
+ int index,
+ struct object_surface *obj_surface,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ struct gen7_surface_state *ss;
+ dri_bo *bo;
+ int w, h, w_pitch;
+ unsigned int tiling, swizzle;
+
+ /* Y plane */
+ w = obj_surface->orig_width;
+ h = obj_surface->orig_height;
+ w_pitch = obj_surface->width;
+
+ bo = vme_context->surface_state_binding_table.bo;
+ dri_bo_map(bo, True);
+ assert(bo->virtual);
+
+ ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
+
+ ss->ss1.base_addr = obj_surface->bo->offset;
+
+ ss->ss2.width = w / 4 - 1;
+ ss->ss2.height = h - 1;
+
+ ss->ss3.pitch = w_pitch - 1;
+
+ dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+ gen75_vme_set_common_surface_tiling(ss, tiling);
+
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_RENDER, 0,
+ 0,
+ SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
+ obj_surface->bo);
+
+ ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+ dri_bo_unmap(bo);
+}
+
+static VAStatus
+gen75_vme_output_buffer_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int index,
+ struct gen6_encoder_context *gen6_encoder_context)
+
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ struct gen7_surface_state *ss;
+ dri_bo *bo;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+ VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+ int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+ int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+ int num_entries;
+
+ if ( is_intra ) {
+ vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
+ } else {
+ vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
+ /*
+ * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
+ * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
+ * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
+ */
+ }
+ vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
+ vme_context->vme_output.pitch = 16;
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "VME output buffer",
+ vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
+ 0x1000);
+ assert(bo);
+ vme_context->vme_output.bo = bo;
+
+ bo = vme_context->surface_state_binding_table.bo;
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+
+ ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
+ memset(ss, 0, sizeof(*ss));
+
+ /* always use 16 bytes as pitch on Sandy Bridge */
+ num_entries = vme_context->vme_output.num_blocks * vme_context->vme_output.size_block / 16;
+
+ ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+ ss->ss1.base_addr = vme_context->vme_output.bo->offset;
+
+ ss->ss2.width = ((num_entries - 1) & 0x7f);
+ ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
+ ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
+
+ ss->ss3.pitch = vme_context->vme_output.pitch - 1;
+
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
+ vme_context->vme_output.bo);
+
+ ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+ dri_bo_unmap(bo);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_vme_surface_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int is_intra,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
+
+ /*Setup surfaces state*/
+ /* current picture for encoding */
+ obj_surface = SURFACE(encode_state->current_render_target);
+ assert(obj_surface);
+ gen75_vme_source_surface_state(ctx, 0, obj_surface, gen6_encoder_context);
+ gen75_vme_media_source_surface_state(ctx, 4, obj_surface, gen6_encoder_context);
+
+ if ( ! is_intra ) {
+ /* reference 0 */
+ obj_surface = SURFACE(pPicParameter->reference_picture);
+ assert(obj_surface);
+ gen75_vme_source_surface_state(ctx, 1, obj_surface, gen6_encoder_context);
+ /* reference 1, FIXME: */
+ // obj_surface = SURFACE(pPicParameter->reference_picture);
+ // assert(obj_surface);
+ //gen7_vme_source_surface_state(ctx, 2, obj_surface);
+ }
+
+ /* VME output */
+ gen75_vme_output_buffer_setup(ctx, encode_state, 3, gen6_encoder_context);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_vme_interface_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ struct gen6_interface_descriptor_data *desc;
+ int i;
+ dri_bo *bo;
+
+ bo = vme_context->idrt.bo;
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ desc = bo->virtual;
+
+ for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+ struct i965_kernel *kernel;
+ kernel = &vme_context->vme_kernels[i];
+ assert(sizeof(*desc) == 32);
+ /*Setup the descritor table*/
+ memset(desc, 0, sizeof(*desc));
+ desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
+ desc->desc2.sampler_count = 0; /* FIXME: */
+ desc->desc2.sampler_state_pointer = 0;
+ desc->desc3.binding_table_entry_count = 1; /* FIXME: */
+ desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
+ desc->desc4.constant_urb_entry_read_offset = 0;
+ desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
+
+ /*kernel start*/
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
+ kernel->bo);
+ desc++;
+ }
+ dri_bo_unmap(bo);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_vme_constant_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ unsigned char *constant_buffer;
+
+ dri_bo_map(vme_context->curbe.bo, 1);
+ assert(vme_context->curbe.bo->virtual);
+ constant_buffer = vme_context->curbe.bo->virtual;
+
+ /* VME MV/Mb cost table is passed by using const buffer */
+ /* Now it uses the fixed search path. So it is constructed directly
+ * in the GPU shader.
+ */
+ memcpy(constant_buffer, (char *)vme_context->vme_state_message, 32);
+
+ dri_bo_unmap( vme_context->curbe.bo);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int is_intra,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ unsigned int *vme_state_message;
+ int i;
+
+ //building VME state message
+ //pass the MV/Mb cost into VME message on HASWell
+ assert(vme_context->vme_state_message);
+ vme_state_message = (unsigned int *)vme_context->vme_state_message;
+
+ vme_state_message[0] = 0x4a4a4a4a;
+ vme_state_message[1] = 0x4a4a4a4a;
+ vme_state_message[2] = 0x4a4a4a4a;
+ vme_state_message[3] = 0x22120200;
+ vme_state_message[4] = 0x62524232;
+
+ for (i=5; i < 8; i++) {
+ vme_state_message[i] = 0;
+ }
+
+ return VA_STATUS_SUCCESS;
+}
+
+static void gen75_vme_pipeline_select(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BATCH(batch, 1);
+ OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
+ ADVANCE_BATCH(batch);
+}
+
+static void gen75_vme_state_base_address(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BATCH(batch, 10);
+
+ OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 8);
+
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address
+ OUT_RELOC(batch, vme_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Dynamic State Base Address
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Indirect Object Base Address
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //Instruction Base Address
+
+ OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound
+ OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound
+ OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound
+ OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound
+
+ /*
+ OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //LLC Coherent Base Address
+ OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY ); //LLC Coherent Upper Bound
+ */
+
+ ADVANCE_BATCH(batch);
+}
+
+static void gen75_vme_vfe_state(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BATCH(batch, 8);
+
+ OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | 6); /*Gen6 CMD_MEDIA_STATE_POINTERS = CMD_MEDIA_STATE */
+ OUT_BATCH(batch, 0); /*Scratch Space Base Pointer and Space*/
+ OUT_BATCH(batch, (vme_context->vfe_state.max_num_threads << 16)
+ | (vme_context->vfe_state.num_urb_entries << 8)
+ | (vme_context->vfe_state.gpgpu_mode << 2) ); /*Maximum Number of Threads , Number of URB Entries, MEDIA Mode*/
+ OUT_BATCH(batch, 0); /*Debug: Object ID*/
+ OUT_BATCH(batch, (vme_context->vfe_state.urb_entry_size << 16)
+ | vme_context->vfe_state.curbe_allocation_size); /*URB Entry Allocation Size , CURBE Allocation Size*/
+ OUT_BATCH(batch, 0); /*Disable Scoreboard*/
+ OUT_BATCH(batch, 0); /*Disable Scoreboard*/
+ OUT_BATCH(batch, 0); /*Disable Scoreboard*/
+
+ ADVANCE_BATCH(batch);
+
+}
+
+static void gen75_vme_curbe_load(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BATCH(batch, 4);
+
+ OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | 2);
+ OUT_BATCH(batch, 0);
+
+ OUT_BATCH(batch, CURBE_TOTAL_DATA_LENGTH);
+ OUT_RELOC(batch, vme_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ ADVANCE_BATCH(batch);
+}
+
+static void gen75_vme_idrt(VADriverContextP ctx,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BATCH(batch, 4);
+
+ OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | 2);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, GEN6_VME_KERNEL_NUMBER * sizeof(struct gen6_interface_descriptor_data));
+ OUT_RELOC(batch, vme_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ ADVANCE_BATCH(batch);
+}
+
+static int gen75_vme_media_object(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int mb_x, int mb_y,
+ int kernel, unsigned int mb_intra_ub,
+ struct gen6_encoder_context *gen6_encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
+ int mb_width = ALIGN(obj_surface->orig_width, 16) / 16;
+ int len_in_dowrds = 8;
+
+ if (batch == NULL)
+ batch = gen6_encoder_context->base.batch;
+
+ BEGIN_BATCH(batch, len_in_dowrds);
+
+ OUT_BATCH(batch, CMD_MEDIA_OBJECT | (len_in_dowrds - 2));
+ OUT_BATCH(batch, kernel); /*Interface Descriptor Offset*/
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0);
+
+ /*inline data */
+ OUT_BATCH(batch, mb_width << 16 | mb_y << 8 | mb_x); /*M0.0 Refrence0 X,Y, not used in Intra*/
+
+ OUT_BATCH(batch, ((mb_intra_ub << 8) | 0));
+ ADVANCE_BATCH(batch);
+
+ return len_in_dowrds * 4;
+}
+
+static void gen75_vme_media_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
+ dri_bo *bo;
+
+ /* constant buffer */
+ dri_bo_unreference(vme_context->curbe.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ CURBE_TOTAL_DATA_LENGTH, 64);
+ assert(bo);
+ vme_context->curbe.bo = bo;
+
+ dri_bo_unreference(vme_context->surface_state_binding_table.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "surface state & binding table",
+ (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6,
+ 4096);
+ assert(bo);
+ vme_context->surface_state_binding_table.bo = bo;
+
+ /* interface descriptor remapping table */
+ dri_bo_unreference(vme_context->idrt.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ MAX_INTERFACE_DESC_GEN6 * sizeof(struct gen6_interface_descriptor_data), 16);
+ assert(bo);
+ vme_context->idrt.bo = bo;
+
+ /* VME output buffer */
+ dri_bo_unreference(vme_context->vme_output.bo);
+ vme_context->vme_output.bo = NULL;
+
+ /* VME state */
+ dri_bo_unreference(vme_context->vme_state.bo);
+ vme_context->vme_state.bo = NULL;
+
+ vme_context->vfe_state.max_num_threads = 60 - 1;
+ vme_context->vfe_state.num_urb_entries = 16;
+ vme_context->vfe_state.gpgpu_mode = 0;
+ vme_context->vfe_state.urb_entry_size = 59 - 1;
+ vme_context->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
+}
+
+#define INTRA_PRED_AVAIL_FLAG_AE 0x60
+#define INTRA_PRED_AVAIL_FLAG_B 0x10
+#define INTRA_PRED_AVAIL_FLAG_C 0x8
+#define INTRA_PRED_AVAIL_FLAG_D 0x4
+#define INTRA_PRED_AVAIL_FLAG_BCD_MASK 0x1C
+
+static void gen75_vme_pipeline_programing(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *main_batch = gen6_encoder_context->base.batch;
+ VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+ VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
+ int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+ int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
+ int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
+ int emit_new_state = 1, object_len_in_bytes;
+ int x, y;
+ unsigned int mb_intra_ub;
+ struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, width_in_mbs * height_in_mbs * 8 * 4 + 0x200);
+
+ intel_batchbuffer_start_atomic(batch, width_in_mbs * height_in_mbs * 8 * 4 + 0x100);
+
+ for(y = 0; y < height_in_mbs; y++){
+ for(x = 0; x < width_in_mbs; x++){
+ mb_intra_ub = 0;
+ if (x != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+ }
+ if (y != 0) {
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+ if (x != 0)
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+ if (x != (width_in_mbs -1))
+ mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+ }
+
+ if (emit_new_state) {
+ /*Step1: MI_FLUSH/PIPE_CONTROL*/
+ intel_batchbuffer_emit_mi_flush(batch);
+
+ /*Step2: State command PIPELINE_SELECT*/
+ gen75_vme_pipeline_select(ctx, gen6_encoder_context, batch);
+
+ /*Step3: State commands configuring pipeline states*/
+ gen75_vme_state_base_address(ctx, gen6_encoder_context, batch);
+ gen75_vme_vfe_state(ctx, gen6_encoder_context, batch);
+ gen75_vme_curbe_load(ctx, gen6_encoder_context, batch);
+ gen75_vme_idrt(ctx, gen6_encoder_context, batch);
+
+ emit_new_state = 0;
+ }
+
+ /*Step4: Primitive commands*/
+ object_len_in_bytes = gen75_vme_media_object(ctx, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, mb_intra_ub, gen6_encoder_context, batch);
+
+ if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
+ assert(0);
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+ emit_new_state = 1;
+ intel_batchbuffer_start_atomic(batch, 0x1000);
+ }
+ }
+ }
+
+ intel_batchbuffer_align(batch, 8);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, MI_BATCH_BUFFER_END);
+ ADVANCE_BATCH(batch);
+
+ intel_batchbuffer_end_atomic(batch);
+
+ /* chain to the main batch buffer */
+ intel_batchbuffer_start_atomic(main_batch, 0x100);
+ intel_batchbuffer_emit_mi_flush(main_batch);
+ BEGIN_BATCH(main_batch, 2);
+ OUT_BATCH(main_batch, MI_BATCH_BUFFER_START | (2 << 6));
+ OUT_RELOC(main_batch,
+ batch->buffer,
+ I915_GEM_DOMAIN_COMMAND, 0,
+ 0);
+ ADVANCE_BATCH(main_batch);
+ intel_batchbuffer_end_atomic(main_batch);
+
+ // end programing
+ intel_batchbuffer_free(batch);
+}
+
+static VAStatus gen75_vme_prepare(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ VAStatus vaStatus = VA_STATUS_SUCCESS;
+ VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
+ int is_intra = pSliceParameter->slice_flags.bits.is_intra;
+
+ gen75_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context);
+
+ gen75_vme_interface_setup(ctx, encode_state, gen6_encoder_context);
+ gen75_vme_vme_state_setup(ctx, encode_state, is_intra, gen6_encoder_context);
+ gen75_vme_constant_setup(ctx, encode_state, gen6_encoder_context);
+
+ /*Programing media pipeline*/
+ gen75_vme_pipeline_programing(ctx, encode_state, gen6_encoder_context);
+
+ return vaStatus;
+}
+
+static VAStatus gen75_vme_run(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
+
+ intel_batchbuffer_flush(batch);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus gen75_vme_stop(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ return VA_STATUS_SUCCESS;
+}
+
+VAStatus gen75_vme_pipeline(VADriverContextP ctx,
+ VAProfile profile,
+ struct encode_state *encode_state,
+ struct gen6_encoder_context *gen6_encoder_context)
+{
+ gen75_vme_media_init(ctx, gen6_encoder_context);
+ gen75_vme_prepare(ctx, encode_state, gen6_encoder_context);
+ gen75_vme_run(ctx, encode_state, gen6_encoder_context);
+ gen75_vme_stop(ctx, encode_state, gen6_encoder_context);
+
+ return VA_STATUS_SUCCESS;
+}
+
+Bool gen75_vme_context_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ int i;
+
+ memcpy(vme_context->vme_kernels, gen75_vme_kernels, sizeof(vme_context->vme_kernels));
+
+ for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+ /*Load kernel into GPU memory*/
+ struct i965_kernel *kernel = &vme_context->vme_kernels[i];
+
+ kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
+ kernel->name,
+ kernel->size,
+ 0x1000);
+ assert(kernel->bo);
+ dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
+ }
+
+ vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
+ return True;
+}
+
+Bool gen75_vme_context_destroy(struct gen6_vme_context *vme_context)
+{
+ int i;
+
+ dri_bo_unreference(vme_context->idrt.bo);
+ vme_context->idrt.bo = NULL;
+
+ dri_bo_unreference(vme_context->surface_state_binding_table.bo);
+ vme_context->surface_state_binding_table.bo = NULL;
+
+ dri_bo_unreference(vme_context->curbe.bo);
+ vme_context->curbe.bo = NULL;
+
+ dri_bo_unreference(vme_context->vme_output.bo);
+ vme_context->vme_output.bo = NULL;
+
+ dri_bo_unreference(vme_context->vme_state.bo);
+ vme_context->vme_state.bo = NULL;
+
+ for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
+ /*Load kernel into GPU memory*/
+ struct i965_kernel *kernel = &vme_context->vme_kernels[i];
+
+ dri_bo_unreference(kernel->bo);
+ kernel->bo = NULL;
+ }
+
+ if (vme_context->vme_state_message) {
+ free(vme_context->vme_state_message);
+ vme_context->vme_state_message = NULL;
+ }
+
+ return True;
+}
diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c
new file mode 100644
index 0000000..b1bef7b
--- /dev/null
+++ b/src/gen75_vpp_vebox.c
@@ -0,0 +1,861 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Li Xiaowei <xiaowei.a.li at intel.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+#include "i965_defines.h"
+#include "i965_structs.h"
+#include "gen75_vpp_vebox.h"
+
+#define PI 3.1415926
+
+extern VAStatus
+i965_CreateSurfaces(VADriverContextP ctx,
+ int width,
+ int height,
+ int format,
+ int num_surfaces,
+ VASurfaceID *surfaces);
+
+int format_convert(float src, int out_int_bits, int out_frac_bits,int out_sign_flag)
+{
+ unsigned char negative_flag = (src < 0.0) ? 1 : 0;
+ float src_1 = (!negative_flag)? src: -src ;
+ unsigned int factor = 1 << out_frac_bits;
+ int output_value = 0;
+
+ unsigned int integer_part = 0;//floor(src_1);
+ unsigned int fraction_part = ((int)((src_1 - integer_part) * factor)) & (factor - 1) ;
+
+ output_value = (integer_part << out_frac_bits) | fraction_part;
+
+ if(negative_flag)
+ output_value = (~output_value + 1) & ((1 <<(out_int_bits + out_frac_bits)) -1);
+
+ if(out_sign_flag == 1 && negative_flag)
+ {
+ output_value |= negative_flag <<(out_int_bits + out_frac_bits);
+ }
+ return output_value;
+}
+
+void hsw_veb_dndi_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ unsigned int* p_table ;
+ /*
+ VAProcFilterParameterBufferDeinterlacing *di_param =
+ (VAProcFilterParameterBufferDeinterlacing *) proc_ctx->filter_di;
+
+ VAProcFilterParameterBuffer * dn_param =
+ (VAProcFilterParameterBuffer *) proc_ctx->filter_dn;
+ */
+ p_table = (unsigned int *)proc_ctx->dndi_state_table.ptr;
+
+ *p_table ++ = 0; // reserved . w0
+ *p_table ++ = ( 0 << 24 | // denoise STAD threshold . w1
+ 128 << 16 | // dnmh_history_max
+ 0 << 12 | // reserved
+ 8 << 8 | // dnmh_delta[3:0]
+ 0 ); // denoise ASD threshold
+
+ *p_table ++ = ( 0 << 30 | // reserved . w2
+ 16 << 24 | // temporal diff th
+ 0 << 22 | // reserved.
+ 8 << 16 | // low temporal diff th
+ 0 << 13 | // STMM C2
+ 0 << 8 | // denoise moving pixel th
+ 64 ); // denoise th for sum of complexity measure
+
+ *p_table ++ = ( 0 << 30 | // reserved . w3
+ 4 << 24 | // good neighbor th[5:0]
+ 9 << 20 | // CAT slope minus 1
+ 5 << 16 | // SAD Tight in
+ 0 << 14 | // smooth mv th
+ 0 << 12 | // reserved
+ 1 << 8 | // bne_edge_th[3:0]
+ 15 ); // block noise estimate noise th
+
+ *p_table ++ = ( 0 << 31 | // STMM blending constant select. w4
+ 64 << 24 | // STMM trc1
+ 0 << 16 | // STMM trc2
+ 0 << 14 | // reserved
+ 2 << 8 | // VECM_mul
+ 128 ); // maximum STMM
+
+ *p_table ++ = ( 0 << 24 | // minumum STMM . W5
+ 0 << 22 | // STMM shift down
+ 0 << 20 | // STMM shift up
+ 7 << 16 | // STMM output shift
+ 128 << 8 | // SDI threshold
+ 8 ); // SDI delta
+
+ *p_table ++ = ( 0 << 24 | // SDI fallback mode 1 T1 constant . W6
+ 0 << 16 | // SDI fallback mode 1 T2 constant
+ 0 << 8 | // SDI fallback mode 2 constant(angle2x1)
+ 0 ); // FMD temporal difference threshold
+
+ *p_table ++ = ( 32 << 24 | // FMD #1 vertical difference th . w7
+ 32 << 16 | // FMD #2 vertical difference th
+ 1 << 14 | // CAT th1
+ 32 << 8 | // FMD tear threshold
+ 0 << 7 | // MCDI Enable, use motion compensated deinterlace algorithm
+ 0 << 6 | // progressive DN
+ 0 << 4 | // reserved
+ 0 << 3 | // DN/DI Top First
+ 0 ); // reserved
+
+ *p_table ++ = ( 0 << 29 | // reserved . W8
+ 0 << 23 | // dnmh_history_init[5:0]
+ 10 << 19 | // neighborPixel th
+ 0 << 18 | // reserved
+ 0 << 16 | // FMD for 2nd field of previous frame
+ 25 << 10 | // MC pixel consistency th
+ 0 << 8 | // FMD for 1st field for current frame
+ 10 << 4 | // SAD THB
+ 5 ); // SAD THA
+
+ *p_table ++ = ( 0 << 24 | // reserved
+ 0 << 16 | // chr_dnmh_stad_th
+ 0 << 13 | // reserved
+ 0 << 12 | // chrome denoise enable
+ 0 << 6 | // chr temp diff th
+ 0 ); // chr temp diff low
+
+}
+
+void hsw_veb_iecp_std_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ unsigned int *p_table = proc_ctx->iecp_state_table.ptr + 0 ;
+ /*
+ VAProcFilterParameterBuffer * std_param =
+ (VAProcFilterParameterBuffer *) proc_ctx->filter_std;
+ */
+ if(!(proc_ctx->filters_mask & VPP_IECP_STD_STE)){
+ memset(p_table, 0, 29 * 4);
+ }else{
+ *p_table ++ = 0x9a6e39f0;
+ *p_table ++ = 0x400c0000;
+ *p_table ++ = 0x00001180;
+ *p_table ++ = 0xfe2f2e00;
+ *p_table ++ = 0x000000ff;
+
+ *p_table ++ = 0x00140000;
+ *p_table ++ = 0xd82e0000;
+ *p_table ++ = 0x8285ecec;
+ *p_table ++ = 0x00008282;
+ *p_table ++ = 0x00000000;
+
+ *p_table ++ = 0x02117000;
+ *p_table ++ = 0xa38fec96;
+ *p_table ++ = 0x0000c8c8;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x01478000;
+
+ *p_table ++ = 0x0007c306;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x1c1bd000;
+ *p_table ++ = 0x00000000;
+
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x0007cf80;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00000000;
+
+ *p_table ++ = 0x1c080000;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00000000;
+ }
+}
+
+void hsw_veb_iecp_ace_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 116);
+
+ if(!(proc_ctx->filters_mask & VPP_IECP_ACE)){
+ memset(p_table, 0, 13 * 4);
+ }else{
+ *p_table ++ = 0x00000068;
+ *p_table ++ = 0x4c382410;
+ *p_table ++ = 0x9c887460;
+ *p_table ++ = 0xebd8c4b0;
+ *p_table ++ = 0x604c3824;
+
+ *p_table ++ = 0xb09c8874;
+ *p_table ++ = 0x0000d8c4;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00000000;
+
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00000000;
+ }
+}
+
+void hsw_veb_iecp_tcc_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 168);
+ /*
+ VAProcFilterParameterBuffer * tcc_param =
+ (VAProcFilterParameterBuffer *) proc_ctx->filter_iecp_tcc;
+ */
+ if(!(proc_ctx->filters_mask & VPP_IECP_TCC)){
+ memset(p_table, 0, 11 * 4);
+ }else{
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x1e34cc91;
+ *p_table ++ = 0x3e3cce91;
+ *p_table ++ = 0x02e80195;
+
+ *p_table ++ = 0x0197046b;
+ *p_table ++ = 0x01790174;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x03030000;
+
+ *p_table ++ = 0x009201c0;
+ }
+}
+
+void hsw_veb_iecp_pro_amp_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ unsigned int contrast = 0x80; //default
+ int brightness = 0x00; //default
+ int cos_c_s = 256 ; //default
+ int sin_c_s = 0; //default
+ unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 212);
+
+ if(!(proc_ctx->filters_mask & VPP_IECP_PRO_AMP)){
+ memset(p_table, 0, 2 * 4);
+ }else {
+ float tmp_value = 0.0;
+ float src_saturation = 1.0;
+ float src_hue = 0.0;
+ float src_contrast = 1.0;
+ /*
+ float src_brightness = 0.0;
+
+ VAProcFilterParameterBufferColorBalance * amp_param =
+ (VAProcFilterParameterBufferColorBalance *) proc_ctx->filter_iecp_amp;
+ VAProcColorBalanceType attrib = amp_param->attrib;
+
+ if(attrib == VAProcColorBalanceHue) {
+ src_hue = amp_param->value; //(-180.0, 180.0)
+ }else if(attrib == VAProcColorBalanceSaturation) {
+ src_saturation = amp_param->value; //(0.0, 10.0)
+ }else if(attrib == VAProcColorBalanceBrightness) {
+ src_brightness = amp_param->value; // (-100.0, 100.0)
+ brightness = format_convert(src_brightness, 7, 4, 1);
+ }else if(attrib == VAProcColorBalanceContrast) {
+ src_contrast = amp_param->value; // (0.0, 10.0)
+ contrast = format_convert(src_contrast, 4, 7, 0);
+ }
+ */
+ tmp_value = cos(src_hue/180*PI) * src_contrast * src_saturation;
+ cos_c_s = format_convert(tmp_value, 7, 8, 1);
+
+ tmp_value = sin(src_hue/180*PI) * src_contrast * src_saturation;
+ sin_c_s = format_convert(tmp_value, 7, 8, 1);
+
+ *p_table ++ = ( 0 << 28 | //reserved
+ contrast << 17 | //contrast value (U4.7 format)
+ 0 << 13 | //reserved
+ brightness << 1| // S7.4 format
+ 1);
+
+ *p_table ++ = ( cos_c_s << 16 | // cos(h) * contrast * saturation
+ sin_c_s); // sin(h) * contrast * saturation
+
+ }
+}
+
+
+void hsw_veb_iecp_csc_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 220);
+ float tran_coef[9] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
+ float v_coef[3] = {0.0, 0.0, 0.0};
+ float u_coef[3] = {0.0, 0.0, 0.0};
+ int is_transform_enabled = 0;
+
+ if(!(proc_ctx->filters_mask & VPP_IECP_CSC)){
+ memset(p_table, 0, 8 * 4);
+ return;
+ }
+ /*
+ VAProcColorStandardType in_color_std = proc_ctx->pipeline_param->surface_color_standard;
+ VAProcColorStandardType out_color_std = proc_ctx->pipeline_param->output_color_standard;
+ assert(in_color_std == out_color_std);
+ */
+ if(proc_ctx->fourcc_input == VA_FOURCC('R','G','B','A') &&
+ (proc_ctx->fourcc_output == VA_FOURCC('N','V','1','2') ||
+ proc_ctx->fourcc_output == VA_FOURCC('Y','V','1','2') ||
+ proc_ctx->fourcc_output == VA_FOURCC('Y','V','Y','2') ||
+ proc_ctx->fourcc_output == VA_FOURCC('A','Y','U','V'))) {
+
+ tran_coef[0] = 0.257;
+ tran_coef[1] = 0.504;
+ tran_coef[2] = 0.098;
+ tran_coef[3] = -0.148;
+ tran_coef[4] = -0.291;
+ tran_coef[5] = 0.439;
+ tran_coef[6] = 0.439;
+ tran_coef[7] = -0.368;
+ tran_coef[8] = -0.071;
+
+ u_coef[0] = 16 * 4;
+ u_coef[1] = 128 * 4;
+ u_coef[2] = 128 * 4;
+
+ is_transform_enabled = 1;
+ }else if((proc_ctx->fourcc_input == VA_FOURCC('N','V','1','2') ||
+ proc_ctx->fourcc_input == VA_FOURCC('Y','V','1','2') ||
+ proc_ctx->fourcc_input == VA_FOURCC('Y','U','Y','2') ||
+ proc_ctx->fourcc_input == VA_FOURCC('A','Y','U','V'))&&
+ proc_ctx->fourcc_output == VA_FOURCC('R','G','B','A')) {
+
+ tran_coef[0] = 1.164;
+ tran_coef[1] = 0.000;
+ tran_coef[2] = 1.569;
+ tran_coef[3] = 1.164;
+ tran_coef[4] = -0.813;
+ tran_coef[5] = -0.392;
+ tran_coef[6] = 1.164;
+ tran_coef[7] = 2.017;
+ tran_coef[8] = 0.000;
+
+ v_coef[0] = -16 * 4;
+ v_coef[1] = -128 * 4;
+ v_coef[2] = -128 * 4;
+
+ is_transform_enabled = 1;
+ }else if(proc_ctx->fourcc_input != proc_ctx->fourcc_output){
+ //enable when input and output format are different.
+ is_transform_enabled = 1;
+ }
+
+ if(is_transform_enabled == 0){
+ memset(p_table, 0, 8 * 4);
+ }else{
+ *p_table ++ = ( 0 << 29 | //reserved
+ format_convert(tran_coef[1], 2, 10, 1) << 16 | //c1, s2.10 format
+ format_convert(tran_coef[0], 2, 10, 1) << 3 | //c0, s2.10 format
+ 0 << 2 | //reserved
+ 0 << 1 | // yuv_channel swap
+ is_transform_enabled);
+
+ *p_table ++ = ( 0 << 26 | //reserved
+ format_convert(tran_coef[3], 2, 10, 1) << 13 |
+ format_convert(tran_coef[2], 2, 10, 1));
+
+ *p_table ++ = ( 0 << 26 | //reserved
+ format_convert(tran_coef[5], 2, 10, 1) << 13 |
+ format_convert(tran_coef[4], 2, 10, 1));
+
+ *p_table ++ = ( 0 << 26 | //reserved
+ format_convert(tran_coef[7], 2, 10, 1) << 13 |
+ format_convert(tran_coef[6], 2, 10, 1));
+
+ *p_table ++ = ( 0 << 13 | //reserved
+ format_convert(tran_coef[8], 2, 10, 1));
+
+ *p_table ++ = ( 0 << 22 | //reserved
+ format_convert(u_coef[0], 10, 0, 1) << 11 |
+ format_convert(v_coef[0], 10, 0, 1));
+
+ *p_table ++ = ( 0 << 22 | //reserved
+ format_convert(u_coef[1], 10, 0, 1) << 11 |
+ format_convert(v_coef[1], 10, 0, 1));
+
+ *p_table ++ = ( 0 << 22 | //reserved
+ format_convert(u_coef[2], 10, 0, 1) << 11 |
+ format_convert(v_coef[2], 10, 0, 1));
+ }
+}
+
+void hsw_veb_iecp_aoi_table(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ unsigned int *p_table = (unsigned int*)(proc_ctx->iecp_state_table.ptr + 252);
+ /*
+ VAProcFilterParameterBuffer * tcc_param =
+ (VAProcFilterParameterBuffer *) proc_ctx->filter_iecp_tcc;
+ */
+ if(!(proc_ctx->filters_mask & VPP_IECP_AOI)){
+ memset(p_table, 0, 3 * 4);
+ }else{
+ *p_table ++ = 0x00000000;
+ *p_table ++ = 0x00030000;
+ *p_table ++ = 0x00030000;
+ }
+}
+
+void hsw_veb_state_table_setup(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ if(proc_ctx->filters_mask & 0x000000ff) {
+ dri_bo *dndi_bo = proc_ctx->dndi_state_table.bo;
+ dri_bo_map(dndi_bo, 1);
+ proc_ctx->dndi_state_table.ptr = dndi_bo->virtual;
+
+ hsw_veb_dndi_table(ctx, proc_ctx);
+
+ dri_bo_unmap(dndi_bo);
+ }
+
+ if(proc_ctx->filters_mask & 0x0000ff00 ||
+ proc_ctx->fourcc_input != proc_ctx->fourcc_output) {
+ dri_bo *iecp_bo = proc_ctx->iecp_state_table.bo;
+ dri_bo_map(iecp_bo, 1);
+ proc_ctx->iecp_state_table.ptr = iecp_bo->virtual;
+
+ hsw_veb_iecp_std_table(ctx, proc_ctx);
+ hsw_veb_iecp_ace_table(ctx, proc_ctx);
+ hsw_veb_iecp_tcc_table(ctx, proc_ctx);
+ hsw_veb_iecp_pro_amp_table(ctx, proc_ctx);
+ hsw_veb_iecp_csc_table(ctx, proc_ctx);
+ hsw_veb_iecp_aoi_table(ctx, proc_ctx);
+
+ dri_bo_unmap(iecp_bo);
+ }
+}
+
+void hsw_veb_state_command(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ struct intel_batchbuffer *batch = proc_ctx->batch;
+ unsigned int is_dn_enabled = (proc_ctx->filters_mask & 0x01)? 1: 0;
+ unsigned int is_di_enabled = (proc_ctx->filters_mask & 0x02)? 1: 0;
+ unsigned int is_iecp_enabled = (proc_ctx->filters_mask & 0xff00)?1:0;
+
+ BEGIN_VEB_BATCH(batch, 6);
+ OUT_VEB_BATCH(batch, VEB_STATE | (6 - 2));
+ OUT_VEB_BATCH(batch,
+ 0 << 26 | // state surface control bits
+ 0 << 11 | // reserved.
+ 0 << 10 | // pipe sync disable
+ 2 << 8 | // DI output frame
+ 0 << 7 | // 444->422 downsample method
+ 0 << 6 | // 422->420 downsample method
+ !!(proc_ctx->is_first_frame && (is_di_enabled || is_dn_enabled)) << 5 | // DN/DI first frame
+ is_di_enabled << 4 | // DI enable
+ is_dn_enabled << 3 | // DN enable
+ is_iecp_enabled << 2 | // global IECP enabled
+ 0 << 1 | // ColorGamutCompressionEnable
+ 0 ) ; // ColorGamutExpansionEnable.
+
+ OUT_RELOC(batch,
+ proc_ctx->dndi_state_table.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ OUT_RELOC(batch,
+ proc_ctx->iecp_state_table.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ OUT_RELOC(batch,
+ proc_ctx->gamut_state_table.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ OUT_RELOC(batch,
+ proc_ctx->vertex_state_table.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+ ADVANCE_VEB_BATCH(batch);
+}
+
+void hsw_veb_surface_state(VADriverContextP ctx, struct intel_vebox_context *proc_ctx, unsigned int is_output)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = proc_ctx->batch;
+ unsigned int u_offset_y = 0, v_offset_y = 0;
+ unsigned int is_uv_interleaved = 0, tiling = 0, swizzle = 0;
+ unsigned int surface_format = PLANAR_420_8;
+ struct object_surface* obj_surf = NULL;
+ unsigned int surface_pitch = 0;
+ unsigned int half_pitch_chroma = 0;
+
+ if(is_output){
+ obj_surf = SURFACE(proc_ctx->frame_store[FRAME_OUT_CURRENT].surface_id);
+ }else {
+ obj_surf = SURFACE(proc_ctx->frame_store[FRAME_IN_CURRENT].surface_id);
+ }
+
+ if (obj_surf->fourcc == VA_FOURCC_NV12) {
+ surface_format = PLANAR_420_8;
+ surface_pitch = obj_surf->width;
+ printf("NV12, is_output=%d, width = %d, pitch is = %d\n",is_output, obj_surf->orig_width, obj_surf->width);
+ is_uv_interleaved = 1;
+ half_pitch_chroma = 0;
+ } else if (obj_surf->fourcc == VA_FOURCC_YUY2) {
+ surface_format = YCRCB_NORMAL;
+ surface_pitch = obj_surf->width * 2;
+ is_uv_interleaved = 0;
+ half_pitch_chroma = 0;
+ } else if (obj_surf->fourcc == VA_FOURCC_AYUV) {
+ surface_format = PACKED_444A_8;
+ surface_pitch = obj_surf->width * 4;
+ is_uv_interleaved = 0;
+ half_pitch_chroma = 0;
+ } else if (obj_surf->fourcc == VA_FOURCC_RGBA) {
+ surface_format = R8G8B8A8_UNORM_SRGB;
+ surface_pitch = obj_surf->width * 4;
+ is_uv_interleaved = 0;
+ half_pitch_chroma = 0;
+ }
+
+ u_offset_y = obj_surf->y_cb_offset;
+ v_offset_y = obj_surf->y_cr_offset;
+
+ dri_bo_get_tiling(obj_surf->bo, &tiling, &swizzle);
+
+ BEGIN_VEB_BATCH(batch, 6);
+ OUT_VEB_BATCH(batch, VEB_SURFACE_STATE | (6 - 2));
+ OUT_VEB_BATCH(batch,
+ 0 << 1 | // reserved
+ is_output); // surface indentification.
+
+ OUT_VEB_BATCH(batch,
+ (proc_ctx->pic_height - 1) << 18 | // height . w3
+ (proc_ctx->pic_width ) << 4 | // width
+ 0); // reserve
+
+ OUT_VEB_BATCH(batch,
+ surface_format << 28 | // surface format, YCbCr420. w4
+ is_uv_interleaved << 27 | // interleave chrome , two seperate palar
+ 0 << 20 | // reserved
+ (surface_pitch - 1) << 3 | // surface pitch, 64 align
+ half_pitch_chroma << 2 | // half pitch for chrome
+ !!tiling << 1 | // tiled surface, linear surface used
+ (tiling == I915_TILING_Y)); // tiled walk, ignored when liner surface
+
+ OUT_VEB_BATCH(batch,
+ 0 << 29 | // reserved . w5
+ 0 << 16 | // X offset for V(Cb)
+ 0 << 15 | // reserved
+ u_offset_y); // Y offset for V(Cb)
+
+ OUT_VEB_BATCH(batch,
+ 0 << 29 | // reserved . w6
+ 0 << 16 | // X offset for V(Cr)
+ 0 << 15 | // reserved
+ v_offset_y ); // Y offset for V(Cr)
+
+ ADVANCE_VEB_BATCH(batch);
+}
+
+void hsw_veb_dndi_iecp_command(VADriverContextP ctx, struct intel_vebox_context *proc_ctx)
+{
+ struct intel_batchbuffer *batch = proc_ctx->batch;
+ unsigned char frame_ctrl_bits = 0;
+ unsigned int startingX = 0;
+ unsigned int endingX = proc_ctx->pic_width;
+
+ BEGIN_VEB_BATCH(batch, 10);
+ OUT_VEB_BATCH(batch, VEB_DNDI_IECP_STATE | (10 - 2));
+ OUT_VEB_BATCH(batch,
+ startingX << 16 |
+ endingX);
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_IN_CURRENT].bo,
+ I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_IN_PREVIOUS].bo,
+ I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_IN_STMM].bo,
+ I915_GEM_DOMAIN_RENDER, 0, frame_ctrl_bits);
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_OUT_STMM].bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_OUT_CURRENT].bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_OUT_PREVIOUS].bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);
+ OUT_RELOC(batch,
+ proc_ctx->frame_store[FRAME_OUT_STATISTIC].bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, frame_ctrl_bits);
+
+ ADVANCE_VEB_BATCH(batch);
+}
+
+
+void hsw_veb_surface_reference(VADriverContextP ctx,
+ struct intel_vebox_context *proc_ctx)
+{
+ struct object_surface * obj_surf;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ /* update the input surface */
+ obj_surf = SURFACE(proc_ctx->surface_input);
+ proc_ctx->frame_store[FRAME_IN_CURRENT].surface_id = proc_ctx->surface_input;
+ proc_ctx->frame_store[FRAME_IN_CURRENT].bo = obj_surf->bo;
+ proc_ctx->frame_store[FRAME_IN_CURRENT].is_internal_surface = 0;
+ dri_bo_reference(proc_ctx->frame_store[FRAME_IN_CURRENT].bo);
+
+ /* update the output surface */
+ if(proc_ctx->filters_mask == VPP_DNDI_DN){
+ obj_surf = SURFACE(proc_ctx->surface_output);
+ proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].surface_id = proc_ctx->surface_output;
+ proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].bo = obj_surf->bo;
+ proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].is_internal_surface = 0;
+ dri_bo_reference(proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].bo);
+ }else {
+ obj_surf = SURFACE(proc_ctx->surface_output);
+ proc_ctx->frame_store[FRAME_OUT_CURRENT].surface_id = proc_ctx->surface_output;
+ proc_ctx->frame_store[FRAME_OUT_CURRENT].bo = obj_surf->bo;
+ proc_ctx->frame_store[FRAME_OUT_CURRENT].is_internal_surface = 0;
+ dri_bo_reference(proc_ctx->frame_store[FRAME_OUT_CURRENT].bo);
+ }
+}
+
+void hsw_veb_surface_unreference(VADriverContextP ctx,
+ struct intel_vebox_context *proc_ctx)
+{
+ /* unreference the input surface */
+ dri_bo_unreference(proc_ctx->frame_store[FRAME_IN_CURRENT].bo);
+ proc_ctx->frame_store[FRAME_IN_CURRENT].surface_id = -1;
+ proc_ctx->frame_store[FRAME_IN_CURRENT].bo = NULL;
+ proc_ctx->frame_store[FRAME_IN_CURRENT].is_internal_surface = 0;
+ dri_bo_unreference(proc_ctx->frame_store[FRAME_IN_CURRENT].bo);
+
+ /* unreference the shared output surface */
+ if(proc_ctx->filters_mask == VPP_DNDI_DN){
+ proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].surface_id = -1;
+ proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].bo = NULL;
+ proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].is_internal_surface = 0;
+ dri_bo_unreference(proc_ctx->frame_store[FRAME_OUT_CURRENT_DN].bo);
+ }else{
+ proc_ctx->frame_store[FRAME_OUT_CURRENT].surface_id = -1;
+ proc_ctx->frame_store[FRAME_OUT_CURRENT].bo = NULL;
+ proc_ctx->frame_store[FRAME_OUT_CURRENT].is_internal_surface = 0;
+ dri_bo_unreference(proc_ctx->frame_store[FRAME_OUT_CURRENT].bo);
+ }
+}
+
+void hsw_veb_resource_prepare(VADriverContextP ctx,
+ struct intel_vebox_context *proc_ctx)
+{
+ VAStatus va_status;
+ dri_bo *bo;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ unsigned int input_fourcc, output_fourcc;
+ unsigned int input_sampling, output_sampling;
+ unsigned int input_tiling, output_tiling;
+ unsigned int i, swizzle;
+
+ struct object_surface* obj_surf_in = SURFACE(proc_ctx->surface_input);
+ struct object_surface* obj_surf_out = SURFACE(proc_ctx->surface_output);
+
+ assert(obj_surf_in->orig_width == obj_surf_out->orig_width &&
+ obj_surf_in->orig_height == obj_surf_out->orig_height);
+
+ proc_ctx->pic_width = obj_surf_in->orig_width;
+ proc_ctx->pic_height = obj_surf_in->orig_height;
+
+ /* record vebox pipeline input surface format information*/
+ if(obj_surf_in->bo == NULL){
+ input_fourcc = VA_FOURCC('N','V','1','2');
+ input_sampling = SUBSAMPLE_YUV420;
+ input_tiling = 1;
+ i965_check_alloc_surface_bo(ctx, obj_surf_in, input_tiling, input_fourcc, input_sampling);
+ } else {
+ input_fourcc = obj_surf_in->fourcc;
+ input_sampling = obj_surf_in->subsampling;
+ dri_bo_get_tiling(obj_surf_in->bo, &input_tiling, &swizzle);
+ input_tiling = !!input_tiling;
+ }
+
+ /* record vebox pipeline output surface format information */
+ if(obj_surf_out->bo == NULL){
+ output_fourcc = VA_FOURCC('N','V','1','2');
+ output_sampling = SUBSAMPLE_YUV420;
+ output_tiling = 1;
+ i965_check_alloc_surface_bo(ctx, obj_surf_out, output_tiling, output_fourcc, output_sampling);
+ }else {
+ output_fourcc = obj_surf_out->fourcc;
+ output_sampling = obj_surf_out->subsampling;
+ dri_bo_get_tiling(obj_surf_out->bo, &output_tiling, &swizzle);
+ output_tiling = !!output_tiling;
+ }
+
+ assert(input_fourcc == VA_FOURCC_NV12 ||
+ input_fourcc == VA_FOURCC_YUY2 ||
+ input_fourcc == VA_FOURCC_AYUV ||
+ input_fourcc == VA_FOURCC_RGBA);
+ assert(output_fourcc == VA_FOURCC_NV12 ||
+ output_fourcc == VA_FOURCC_YUY2 ||
+ output_fourcc == VA_FOURCC_AYUV ||
+ output_fourcc == VA_FOURCC_RGBA);
+
+ proc_ctx->fourcc_input = input_fourcc;
+ proc_ctx->fourcc_output = output_fourcc;
+
+ /* allocate vebox pipeline surfaces */
+ VASurfaceID surfaces[FRAME_STORE_SUM];
+ va_status = i965_CreateSurfaces(ctx,
+ proc_ctx ->pic_width,
+ proc_ctx ->pic_height,
+ VA_RT_FORMAT_YUV420,
+ FRAME_STORE_SUM,
+ surfaces);
+ assert(va_status == VA_STATUS_SUCCESS);
+
+ for(i = FRAME_IN_CURRENT; i < FRAME_STORE_SUM; i ++) {
+ proc_ctx->frame_store[i].surface_id = surfaces[i];
+ struct object_surface* obj_surf = SURFACE(surfaces[i]);
+ if( i == FRAME_IN_CURRENT) {
+ proc_ctx->frame_store[i].surface_id = proc_ctx->surface_input;
+ proc_ctx->frame_store[i].bo = (SURFACE(proc_ctx->surface_input))->bo;
+ proc_ctx->frame_store[i].is_internal_surface = 0;
+ continue;
+ }else if( i == FRAME_IN_PREVIOUS || i == FRAME_OUT_CURRENT_DN) {
+ i965_check_alloc_surface_bo(ctx, obj_surf, input_tiling, input_fourcc, input_sampling);
+ } else if( i == FRAME_IN_STMM || i == FRAME_OUT_STMM){
+ i965_check_alloc_surface_bo(ctx, obj_surf, 1, input_fourcc, input_sampling);
+ } else if( i >= FRAME_OUT_CURRENT){
+ i965_check_alloc_surface_bo(ctx, obj_surf, output_tiling, output_fourcc, output_sampling);
+ }
+ proc_ctx->frame_store[i].bo = obj_surf->bo;
+ dri_bo_reference(proc_ctx->frame_store[i].bo);
+ proc_ctx->frame_store[i].is_internal_surface = 1;
+ }
+
+ /* alloc dndi state table */
+ dri_bo_unreference(proc_ctx->dndi_state_table.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "vebox: dndi state Buffer",
+ 0x1000, 0x1000);
+ proc_ctx->dndi_state_table.bo = bo;
+ dri_bo_reference(proc_ctx->dndi_state_table.bo);
+
+ /* alloc iecp state table */
+ dri_bo_unreference(proc_ctx->iecp_state_table.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "vebox: iecp state Buffer",
+ 0x1000, 0x1000);
+ proc_ctx->iecp_state_table.bo = bo;
+ dri_bo_reference(proc_ctx->iecp_state_table.bo);
+
+ /* alloc gamut state table */
+ dri_bo_unreference(proc_ctx->gamut_state_table.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "vebox: gamut state Buffer",
+ 0x1000, 0x1000);
+ proc_ctx->gamut_state_table.bo = bo;
+ dri_bo_reference(proc_ctx->gamut_state_table.bo);
+
+ /* alloc vertex state table */
+ dri_bo_unreference(proc_ctx->vertex_state_table.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "vertex: iecp state Buffer",
+ 0x1000, 0x1000);
+ proc_ctx->vertex_state_table.bo = bo;
+ dri_bo_reference(proc_ctx->vertex_state_table.bo);
+
+}
+
+VAStatus gen75_vebox_process_picture(VADriverContextP ctx,
+ struct intel_vebox_context *proc_ctx)
+{
+ VAStatus va_status = VA_STATUS_SUCCESS;
+
+ if(proc_ctx->is_first_frame)
+ hsw_veb_resource_prepare(ctx, proc_ctx);
+
+ hsw_veb_surface_reference(ctx, proc_ctx);
+
+ intel_batchbuffer_start_atomic_veb(proc_ctx->batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(proc_ctx->batch);
+ hsw_veb_surface_state(ctx, proc_ctx, INPUT_SURFACE);
+ hsw_veb_surface_state(ctx, proc_ctx, OUTPUT_SURFACE);
+ hsw_veb_state_table_setup(ctx, proc_ctx);
+
+ hsw_veb_state_command(ctx, proc_ctx);
+ hsw_veb_dndi_iecp_command(ctx, proc_ctx);
+ intel_batchbuffer_end_atomic(proc_ctx->batch);
+ intel_batchbuffer_flush(proc_ctx->batch);
+
+ hsw_veb_surface_unreference(ctx, proc_ctx);
+
+ if(proc_ctx->is_first_frame)
+ proc_ctx->is_first_frame = 0;
+
+ return va_status;
+}
+
+void gen75_vebox_context_destroy(VADriverContextP ctx,
+ struct intel_vebox_context *proc_ctx)
+{
+ int i;
+ /* release vebox pipeline surface */
+ for(i = 0; i < FRAME_STORE_SUM; i ++) {
+ if(proc_ctx->frame_store[i].is_internal_surface){
+ dri_bo_unreference(proc_ctx->frame_store[i].bo);
+ }
+ proc_ctx->frame_store[i].surface_id = -1;
+ proc_ctx->frame_store[i].bo = NULL;
+ }
+ /* release dndi state table */
+ dri_bo_unreference(proc_ctx->dndi_state_table.bo);
+ proc_ctx->dndi_state_table.bo = NULL;
+
+ /* release iecp state table */
+ dri_bo_unreference(proc_ctx->iecp_state_table.bo);
+ proc_ctx->dndi_state_table.bo = NULL;
+
+ intel_batchbuffer_free(proc_ctx->batch);
+
+ free(proc_ctx);
+}
+
+struct intel_vebox_context * gen75_vebox_context_init(VADriverContextP ctx)
+{
+ struct intel_driver_data *intel = intel_driver_data(ctx);
+ struct intel_vebox_context *proc_context = calloc(1, sizeof(struct intel_vebox_context));
+
+ proc_context->batch = intel_batchbuffer_new(intel, I915_EXEC_VEBOX, 0);
+ memset(proc_context->frame_store, 0, sizeof(VEBFrameStore)*FRAME_STORE_SUM);
+
+ proc_context->filters_mask = 0;
+ proc_context->is_first_frame = 1;
+ proc_context->filters_mask = 0;
+
+ return proc_context;
+}
+
diff --git a/src/gen75_vpp_vebox.h b/src/gen75_vpp_vebox.h
new file mode 100644
index 0000000..5281c75
--- /dev/null
+++ b/src/gen75_vpp_vebox.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Li Xiaowei <xiaowei.a.li at intel.com>
+ *
+ */
+
+#ifndef _GEN75_VPP_VEBOX_H
+#define _GEN75_VPP_VEBOX_H
+
+#include <xf86drm.h>
+#include <drm.h>
+#include <i915_drm.h>
+#include <intel_bufmgr.h>
+#include "i965_drv_video.h"
+
+#define INPUT_SURFACE 0
+#define OUTPUT_SURFACE 1
+
+#define VPP_DNDI_DN 0x00000001
+#define VPP_DNDI_DI 0x00000002
+#define VPP_IECP_STD_STE 0x00000100
+#define VPP_IECP_ACE 0x00000200
+#define VPP_IECP_TCC 0x00000400
+#define VPP_IECP_PRO_AMP 0x00000800
+#define VPP_IECP_CSC 0x00001000
+#define VPP_IECP_AOI 0x00002000
+#define MAX_FILTER_SUM 8
+
+enum {
+ FRAME_IN_CURRENT = 0,
+ FRAME_IN_PREVIOUS,
+ FRAME_IN_STMM,
+ FRAME_OUT_STMM,
+ FRAME_OUT_CURRENT_DN,
+ FRAME_OUT_CURRENT,
+ FRAME_OUT_PREVIOUS,
+ FRAME_OUT_STATISTIC,
+ FRAME_STORE_SUM,
+};
+
+enum SURFACE_FORMAT{
+ YCRCB_NORMAL = 0,
+ YCRCB_SWAPUVY,
+ YCRCB_SWAPUV,
+ YCRCB_SWAPY,
+ PLANAR_420_8, //NV12
+ PACKED_444A_8,
+ PACKED_422_16,
+ R10G10B10A2_UNORM_SRGB,
+ R8G8B8A8_UNORM_SRGB,
+ PACKED_444_16,
+ PLANAR_422_16,
+ Y8_UNORM,
+ PLANAR_420_16,
+ R16G16B16A16,
+ SURFACE_FORMAT_SUM
+};
+
+typedef struct veb_frame_store {
+ VASurfaceID surface_id;
+ dri_bo *bo;
+ unsigned char is_internal_surface;
+} VEBFrameStore;
+
+typedef struct veb_buffer {
+ dri_bo *bo;
+ void * ptr;
+ unsigned char valid;
+} VEBBuffer;
+
+struct intel_vebox_context
+{
+ struct intel_batchbuffer *batch;
+
+ VASurfaceID surface_input;
+ VASurfaceID surface_output;
+ unsigned int fourcc_input;
+ unsigned int fourcc_output;
+ unsigned int pic_width;
+ unsigned int pic_height;
+
+ VEBFrameStore frame_store[FRAME_STORE_SUM];
+
+ VEBBuffer dndi_state_table;
+ VEBBuffer iecp_state_table;
+ VEBBuffer gamut_state_table;
+ VEBBuffer vertex_state_table;
+
+ unsigned int filters_mask;
+ unsigned char is_first_frame;
+
+ /*
+ VAProcPipelineParameterBuffer * pipeline_param;
+ void * filter_dn;
+ void * filter_di;
+ void * filter_iecp_std;
+ void * filter_iecp_ace;
+ void * filter_iecp_tcc;
+ void * filter_iecp_amp;
+ void * filter_iecp_csc;
+ */
+};
+
+VAStatus gen75_vebox_process_picture(VADriverContextP ctx,
+ struct intel_vebox_context *proc_ctx);
+
+void gen75_vebox_context_destroy(VADriverContextP ctx,
+ struct intel_vebox_context *proc_ctx);
+
+struct intel_vebox_context * gen75_vebox_context_init(VADriverContextP ctx);
+
+#endif
diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
old mode 100644
new mode 100755
index b4584c4..aec9694
--- a/src/gen7_mfd.c
+++ b/src/gen7_mfd.c
@@ -26,15 +26,13 @@
*
*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
+#ifndef HAVE_GEN_AVC_SURFACE
+#define HAVE_GEN_AVC_SURFACE 1
+#endif
-#include "config.h"
+#include "sysdeps.h"
#include "intel_batchbuffer.h"
#include "intel_driver.h"
-
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_decoder_utils.h"
@@ -168,38 +166,21 @@ gen7_mfd_avc_frame_store_index(VADriverContextP ctx,
}
}
-static void
-gen7_mfd_free_avc_surface(void **data)
-{
- struct gen7_avc_surface *gen7_avc_surface = *data;
-
- if (!gen7_avc_surface)
- return;
-
- dri_bo_unreference(gen7_avc_surface->dmv_top);
- gen7_avc_surface->dmv_top = NULL;
- dri_bo_unreference(gen7_avc_surface->dmv_bottom);
- gen7_avc_surface->dmv_bottom = NULL;
-
- free(gen7_avc_surface);
- *data = NULL;
-}
-
static void
gen7_mfd_init_avc_surface(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
struct object_surface *obj_surface)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct gen7_avc_surface *gen7_avc_surface = obj_surface->private_data;
+ GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
int width_in_mbs, height_in_mbs;
- obj_surface->free_private_data = gen7_mfd_free_avc_surface;
- width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
- height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
+ obj_surface->free_private_data = gen_free_avc_surface;
+ width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
if (!gen7_avc_surface) {
- gen7_avc_surface = calloc(sizeof(struct gen7_avc_surface), 1);
+ gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
assert((obj_surface->size & 0x3f) == 0);
obj_surface->private_data = gen7_avc_surface;
}
@@ -212,6 +193,7 @@ gen7_mfd_init_avc_surface(VADriverContextP ctx,
"direct mv w/r buffer",
width_in_mbs * height_in_mbs * 64,
0x1000);
+ assert(gen7_avc_surface->dmv_top);
}
if (gen7_avc_surface->dmv_bottom_flag &&
@@ -220,6 +202,7 @@ gen7_mfd_init_avc_surface(VADriverContextP ctx,
"direct mv w/r buffer",
width_in_mbs * height_in_mbs * 64,
0x1000);
+ assert(gen7_avc_surface->dmv_bottom);
}
}
@@ -420,14 +403,6 @@ gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
}
static void
-gen7_mfd_aes_state(VADriverContextP ctx,
- struct decode_state *decode_state,
- int standard_select)
-{
- /* FIXME */
-}
-
-static void
gen7_mfd_qm_state(VADriverContextP ctx,
int qm_type,
unsigned char *qm,
@@ -446,18 +421,6 @@ gen7_mfd_qm_state(VADriverContextP ctx,
intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
ADVANCE_BCS_BATCH(batch);
}
-static void
-gen7_mfd_wait(VADriverContextP ctx,
- struct decode_state *decode_state,
- int standard_select,
- struct gen7_mfd_context *gen7_mfd_context)
-{
- struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
-
- BEGIN_BCS_BATCH(batch, 1);
- OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
- ADVANCE_BCS_BATCH(batch);
-}
static void
gen7_mfd_avc_img_state(VADriverContextP ctx,
@@ -497,8 +460,8 @@ gen7_mfd_avc_img_state(VADriverContextP ctx,
mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
!pic_param->pic_fields.bits.field_pic_flag);
- width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
- height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
+ width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
/* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
@@ -578,7 +541,7 @@ gen7_mfd_avc_directmode_state(VADriverContextP ctx,
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
struct object_surface *obj_surface;
- struct gen7_avc_surface *gen7_avc_surface;
+ GenAvcSurface *gen7_avc_surface;
VAPictureH264 *va_pic;
int i, j;
@@ -880,7 +843,7 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx,
struct object_surface *obj_surface;
dri_bo *bo;
int i, j, enable_avc_ildb = 0;
- int width_in_mbs;
+ unsigned int width_in_mbs, height_in_mbs;
for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
@@ -906,7 +869,10 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx,
assert(decode_state->pic_param && decode_state->pic_param->buffer);
pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
gen7_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
- width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
+ width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+ height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
+ assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
+ assert(height_in_mbs > 0 && height_in_mbs <= 256);
/* Current decoded picture */
va_pic = &pic_param->CurrPic;
@@ -1600,8 +1566,19 @@ gen7_mfd_vc1_pic_state(VADriverContextP ctx,
if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
- else
+ else {
trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
+ /*
+ * 8.3.6.2.1 Transform Type Selection
+ * If variable-sized transform coding is not enabled,
+ * then the 8x8 transform shall be used for all blocks.
+ * it is also MFX_VC1_PIC_STATE requirement.
+ */
+ if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
+ pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
+ pic_param->transform_fields.bits.frame_level_transform_type = 0;
+ }
+ }
if (picture_type == GEN7_VC1_B_PICTURE) {
@@ -1913,7 +1890,7 @@ gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
intel_batchbuffer_flush(batch);
}
-#ifdef HAVE_JPEG_DECODING
+#ifdef HAVE_VA_JPEG_DECODE
static void
gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
struct decode_state *decode_state,
@@ -1921,10 +1898,10 @@ gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_surface *obj_surface;
- VAPictureParameterBufferJPEG *pic_param;
+ VAPictureParameterBufferJPEGBaseline *pic_param;
int subsampling = SUBSAMPLE_YUV420;
- pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
if (pic_param->num_components == 1)
subsampling = SUBSAMPLE_YUV400;
@@ -2005,15 +1982,13 @@ gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
struct gen7_mfd_context *gen7_mfd_context)
{
struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
- VAPictureParameterBufferJPEG *pic_param;
+ VAPictureParameterBufferJPEGBaseline *pic_param;
int chroma_type = GEN7_YUV420;
int frame_width_in_blks;
int frame_height_in_blks;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
- pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
-
- assert(pic_param->type == VA_JPEG_SOF0); /* only support BASELINE on Ivybridge */
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
if (pic_param->num_components == 1)
chroma_type = GEN7_YUV400;
@@ -2053,20 +2028,20 @@ gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
if (chroma_type == GEN7_YUV400 ||
chroma_type == GEN7_YUV444 ||
chroma_type == GEN7_YUV422V_2Y) {
- frame_width_in_blks = ((pic_param->image_width + 7) / 8);
- frame_height_in_blks = ((pic_param->image_height + 7) / 8);
+ frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
+ frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
} else if (chroma_type == GEN7_YUV411) {
- frame_width_in_blks = ((pic_param->image_width + 31) / 32) * 4;
- frame_height_in_blks = ((pic_param->image_height + 31) / 32) * 4;
+ frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
+ frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
} else {
- frame_width_in_blks = ((pic_param->image_width + 15) / 16) * 2;
- frame_height_in_blks = ((pic_param->image_height + 15) / 16) * 2;
+ frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
+ frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
}
BEGIN_BCS_BATCH(batch, 3);
OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
OUT_BCS_BATCH(batch,
- (va_to_gen7_jpeg_rotation[pic_param->rotation] << 4) | /* rotation */
+ (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
(chroma_type << 0));
OUT_BCS_BATCH(batch,
((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
@@ -2085,24 +2060,24 @@ gen7_mfd_jpeg_huff_table_state(VADriverContextP ctx,
struct gen7_mfd_context *gen7_mfd_context,
int num_tables)
{
- VAHuffmanTableBufferJPEG *huffman_table;
+ VAHuffmanTableBufferJPEGBaseline *huffman_table;
struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
int index;
if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
return;
- huffman_table = (VAHuffmanTableBufferJPEG *)decode_state->huffman_table->buffer;
+ huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
for (index = 0; index < num_tables; index++) {
int id = va_to_gen7_jpeg_hufftable[index];
BEGIN_BCS_BATCH(batch, 53);
OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
OUT_BCS_BATCH(batch, id);
- intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_bits, 12);
- intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_huffval, 12);
- intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_bits, 16);
- intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_huffval, 164);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
+ intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
ADVANCE_BCS_BATCH(batch);
}
}
@@ -2120,26 +2095,26 @@ gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
struct decode_state *decode_state,
struct gen7_mfd_context *gen7_mfd_context)
{
- VAPictureParameterBufferJPEG *pic_param;
- VAIQMatrixBufferJPEG *iq_matrix;
+ VAPictureParameterBufferJPEGBaseline *pic_param;
+ VAIQMatrixBufferJPEGBaseline *iq_matrix;
int index;
if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
return;
- iq_matrix = (VAIQMatrixBufferJPEG *)decode_state->iq_matrix->buffer;
- pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
+ iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
assert(pic_param->num_components <= 3);
for (index = 0; index < pic_param->num_components; index++) {
int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
- unsigned char *qm = iq_matrix->quantiser_matrix[pic_param->components[index].quantiser_table_selector];
- int precision = iq_matrix->precision[pic_param->components[index].quantiser_table_selector];
+ unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
unsigned char raster_qm[64];
int j;
- assert(precision == 0);
+ if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
+ continue;
for (j = 0; j < 64; j++)
raster_qm[zigzag_direct[j]] = qm[j];
@@ -2150,9 +2125,9 @@ gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
static void
gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
- VAPictureParameterBufferJPEG *pic_param,
- VASliceParameterBufferJPEG *slice_param,
- VASliceParameterBufferJPEG *next_slice_param,
+ VAPictureParameterBufferJPEGBaseline *pic_param,
+ VASliceParameterBufferJPEGBaseline *slice_param,
+ VASliceParameterBufferJPEGBaseline *next_slice_param,
dri_bo *slice_data_bo,
struct gen7_mfd_context *gen7_mfd_context)
{
@@ -2165,7 +2140,7 @@ gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
assert(slice_param->num_components <= pic_param->num_components);
for (i = 0; i < slice_param->num_components; i++) {
- switch (slice_param->components[i].component_id - pic_param->components[0].component_id + 1) {
+ switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
case 1:
scan_component_mask |= (1 << 0);
break;
@@ -2616,17 +2591,15 @@ gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
struct gen7_mfd_context *gen7_mfd_context)
{
struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
- VAPictureParameterBufferJPEG *pic_param;
- VASliceParameterBufferJPEG *slice_param, *next_slice_param, *next_slice_group_param;
+ VAPictureParameterBufferJPEGBaseline *pic_param;
+ VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
dri_bo *slice_data_bo;
int i, j, max_selector = 0;
assert(decode_state->pic_param && decode_state->pic_param->buffer);
- pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
+ pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
/* Currently only support Baseline DCT */
- assert(pic_param->type == VA_JPEG_SOF0);
- assert(pic_param->sample_precision == 8);
gen7_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
gen7_mfd_jpeg_wa(ctx, gen7_mfd_context);
@@ -2639,14 +2612,14 @@ gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
for (j = 0; j < decode_state->num_slice_params; j++) {
assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
- slice_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j]->buffer;
+ slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
slice_data_bo = decode_state->slice_datas[j]->bo;
gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
if (j == decode_state->num_slice_params - 1)
next_slice_group_param = NULL;
else
- next_slice_group_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j + 1]->buffer;
+ next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
int component;
@@ -2659,11 +2632,11 @@ gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
next_slice_param = next_slice_group_param;
for (component = 0; component < slice_param->num_components; component++) {
- if (max_selector < slice_param->components[component].dc_selector)
- max_selector = slice_param->components[component].dc_selector;
+ if (max_selector < slice_param->components[component].dc_table_selector)
+ max_selector = slice_param->components[component].dc_table_selector;
- if (max_selector < slice_param->components[component].ac_selector)
- max_selector = slice_param->components[component].ac_selector;
+ if (max_selector < slice_param->components[component].ac_table_selector)
+ max_selector = slice_param->components[component].ac_table_selector;
}
slice_param++;
@@ -2675,14 +2648,14 @@ gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
for (j = 0; j < decode_state->num_slice_params; j++) {
assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
- slice_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j]->buffer;
+ slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
slice_data_bo = decode_state->slice_datas[j]->bo;
gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
if (j == decode_state->num_slice_params - 1)
next_slice_group_param = NULL;
else
- next_slice_group_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j + 1]->buffer;
+ next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
@@ -2734,7 +2707,7 @@ gen7_mfd_decode_picture(VADriverContextP ctx,
gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
break;
-#ifdef HAVE_JPEG_DECODING
+#ifdef HAVE_VA_JPEG_DECODE
case VAProfileJPEGBaseline:
gen7_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
break;
@@ -2796,7 +2769,7 @@ gen7_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
gen7_mfd_context->base.run = gen7_mfd_decode_picture;
- gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
+ gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
diff --git a/src/gen7_mfd.h b/src/gen7_mfd.h
index 08d9e3a..0700c80 100644
--- a/src/gen7_mfd.h
+++ b/src/gen7_mfd.h
@@ -35,13 +35,6 @@
#include <intel_bufmgr.h>
#include "i965_decoder.h"
-struct gen7_avc_surface
-{
- dri_bo *dmv_top;
- dri_bo *dmv_bottom;
- int dmv_bottom_flag;
-};
-
#define GEN7_VC1_I_PICTURE 0
#define GEN7_VC1_P_PICTURE 1
#define GEN7_VC1_B_PICTURE 2
diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c
index b2b6c92..a118076 100644
--- a/src/i965_avc_bsd.c
+++ b/src/i965_avc_bsd.c
@@ -30,6 +30,10 @@
#include <string.h>
#include <assert.h>
+#ifndef HAVE_GEN_AVC_SURFACE
+#define HAVE_GEN_AVC_SURFACE 1
+#endif
+
#include "intel_batchbuffer.h"
#include "intel_driver.h"
@@ -40,23 +44,6 @@
#include "i965_media.h"
#include "i965_decoder_utils.h"
-static void
-i965_avc_bsd_free_avc_bsd_surface(void **data)
-{
- struct i965_avc_bsd_surface *avc_bsd_surface = *data;
-
- if (!avc_bsd_surface)
- return;
-
- dri_bo_unreference(avc_bsd_surface->dmv_top);
- avc_bsd_surface->dmv_top = NULL;
- dri_bo_unreference(avc_bsd_surface->dmv_bottom);
- avc_bsd_surface->dmv_bottom = NULL;
-
- free(avc_bsd_surface);
- *data = NULL;
-}
-
static void
i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx,
struct object_surface *obj_surface,
@@ -64,18 +51,16 @@ i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx,
struct i965_h264_context *i965_h264_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_avc_bsd_context *i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
- struct i965_avc_bsd_surface *avc_bsd_surface = obj_surface->private_data;
+ GenAvcSurface *avc_bsd_surface = obj_surface->private_data;
- obj_surface->free_private_data = i965_avc_bsd_free_avc_bsd_surface;
+ obj_surface->free_private_data = gen_free_avc_surface;
if (!avc_bsd_surface) {
- avc_bsd_surface = calloc(sizeof(struct i965_avc_bsd_surface), 1);
+ avc_bsd_surface = calloc(sizeof(GenAvcSurface), 1);
assert((obj_surface->size & 0x3f) == 0);
obj_surface->private_data = avc_bsd_surface;
}
- avc_bsd_surface->ctx = i965_avc_bsd_context;
avc_bsd_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
!pic_param->seq_fields.bits.direct_8x8_inference_flag);
@@ -404,7 +389,7 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx,
int i, j;
VAPictureH264 *va_pic;
struct object_surface *obj_surface;
- struct i965_avc_bsd_surface *avc_bsd_surface;
+ GenAvcSurface *avc_bsd_surface;
i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
diff --git a/src/i965_avc_bsd.h b/src/i965_avc_bsd.h
index 25606ba..7f83007 100644
--- a/src/i965_avc_bsd.h
+++ b/src/i965_avc_bsd.h
@@ -42,14 +42,6 @@ struct i965_avc_bsd_context
} mpr_row_store;
};
-struct i965_avc_bsd_surface
-{
- struct i965_avc_bsd_context *ctx;
- dri_bo *dmv_top;
- dri_bo *dmv_bottom;
- int dmv_bottom_flag;
-};
-
void i965_avc_bsd_pipeline(VADriverContextP, struct decode_state *, void *h264_context);
void i965_avc_bsd_decode_init(VADriverContextP, void *h264_context);
Bool i965_avc_bsd_ternimate(struct i965_avc_bsd_context *);
diff --git a/src/i965_decoder.h b/src/i965_decoder.h
index 0e69e14..0226707 100644
--- a/src/i965_decoder.h
+++ b/src/i965_decoder.h
@@ -26,6 +26,8 @@
#define I965_DECODER_H
#include <stdint.h>
+#include <stdlib.h>
+
#include <va/va.h>
#include <intel_bufmgr.h>
@@ -43,4 +45,47 @@ struct gen_buffer {
int valid;
};
+#if HAVE_GEN_AVC_SURFACE
+
+static pthread_mutex_t free_avc_surface_lock = PTHREAD_MUTEX_INITIALIZER;
+
+typedef struct gen_avc_surface GenAvcSurface;
+struct gen_avc_surface
+{
+ dri_bo *dmv_top;
+ dri_bo *dmv_bottom;
+ int dmv_bottom_flag;
+};
+
+static void
+gen_free_avc_surface(void **data)
+{
+ GenAvcSurface *avc_surface;
+
+ pthread_mutex_lock(&free_avc_surface_lock);
+
+ avc_surface = *data;
+
+ if (!avc_surface) {
+ pthread_mutex_unlock(&free_avc_surface_lock);
+ return;
+ }
+
+
+ dri_bo_unreference(avc_surface->dmv_top);
+ avc_surface->dmv_top = NULL;
+ dri_bo_unreference(avc_surface->dmv_bottom);
+ avc_surface->dmv_bottom = NULL;
+
+ free(avc_surface);
+ *data = NULL;
+
+ pthread_mutex_unlock(&free_avc_surface_lock);
+}
+
+#endif
+
+extern struct hw_context *
+gen75_dec_hw_context_init(VADriverContextP ctx, VAProfile profile);
+
#endif /* I965_DECODER_H */
diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
index 8450d23..6326796 100644
--- a/src/i965_decoder_utils.c
+++ b/src/i965_decoder_utils.c
@@ -21,9 +21,7 @@
* DEALINGS IN THE SOFTWARE.
*/
-#include <assert.h>
-#include <stddef.h>
-#include <string.h>
+#include "sysdeps.h"
#include <alloca.h>
#include "intel_batchbuffer.h"
#include "i965_decoder_utils.h"
diff --git a/src/i965_defines.h b/src/i965_defines.h
index b58260a..5988949 100644
--- a/src/i965_defines.h
+++ b/src/i965_defines.h
@@ -234,7 +234,9 @@
# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW3: scratch space */
/* DW4 */
-# define GEN7_PS_MAX_THREADS_SHIFT 23
+# define GEN7_PS_MAX_THREADS_SHIFT_IVB 24
+# define GEN7_PS_MAX_THREADS_SHIFT_HSW 23
+# define GEN7_PS_SAMPLE_MASK_SHIFT_HSW 12
# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11)
# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10)
# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9)
@@ -294,6 +296,7 @@
#define MFX_AVC_REF_IDX_STATE MFX(2, 1, 0, 4)
#define MFX_AVC_WEIGHTOFFSET_STATE MFX(2, 1, 0, 5)
+#define MFD_AVC_PICID_STATE MFX(2, 1, 1, 5)
#define MFD_AVC_BSD_OBJECT MFX(2, 1, 1, 8)
#define MFC_AVC_FQM_STATE MFX(2, 1, 2, 2)
@@ -319,6 +322,17 @@
#define MFD_JPEG_BSD_OBJECT MFX(2, 7, 1, 8)
+#define VEB(pipeline, op, sub_opa, sub_opb) \
+ (3 << 29 | \
+ (pipeline) << 27 | \
+ (op) << 24 | \
+ (sub_opa) << 21 | \
+ (sub_opb) << 16)
+
+#define VEB_SURFACE_STATE VEB(2, 4, 0, 0)
+#define VEB_STATE VEB(2, 4, 0, 2)
+#define VEB_DNDI_IECP_STATE VEB(2, 4, 0, 3)
+
#define I965_DEPTHFORMAT_D32_FLOAT 1
#define BASE_ADDRESS_MODIFY (1 << 0)
@@ -525,6 +539,13 @@
#define I965_MIPFILTER_NEAREST 1
#define I965_MIPFILTER_LINEAR 3
+#define HSW_SCS_ZERO 0
+#define HSW_SCS_ONE 1
+#define HSW_SCS_RED 4
+#define HSW_SCS_GREEN 5
+#define HSW_SCS_BLUE 6
+#define HSW_SCS_ALPHA 7
+
#define I965_TEXCOORDMODE_WRAP 0
#define I965_TEXCOORDMODE_MIRROR 1
#define I965_TEXCOORDMODE_CLAMP 2
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 0526efc..1180114 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -27,19 +27,23 @@
*
*/
-#include "config.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
+#include "sysdeps.h"
-#include <va/va_dricommon.h>
+#ifdef HAVE_VA_X11
+# include "i965_output_dri.h"
+#endif
+
+#ifdef HAVE_VA_WAYLAND
+# include "i965_output_wayland.h"
+#endif
#include "intel_driver.h"
#include "intel_memman.h"
#include "intel_batchbuffer.h"
#include "i965_defines.h"
#include "i965_drv_video.h"
+#include "i965_decoder.h"
+#include "i965_encoder.h"
#define CONFIG_ID_OFFSET 0x01000000
#define CONTEXT_ID_OFFSET 0x02000000
@@ -74,12 +78,40 @@
#define HAS_JPEG(ctx) (IS_GEN7((ctx)->intel.device_id) && \
(ctx)->intel.has_bsd)
+#define HAS_ACCELERATED_GETIMAGE(ctx) (IS_GEN6((ctx)->intel.device_id) || \
+ IS_GEN7((ctx)->intel.device_id))
+
+#define HAS_ACCELERATED_PUTIMAGE(ctx) HAS_VPP(ctx)
+
+#if VA_CHECK_VERSION(0,33,0)
+/* Check whether we are rendering to X11 (VA/X11 or VA/GLX API) */
+#define IS_VA_X11(ctx) \
+ (((ctx)->display_type & VA_DISPLAY_MAJOR_MASK) == VA_DISPLAY_X11)
+
+/* Check whether we are rendering to Wayland */
+#define IS_VA_WAYLAND(ctx) \
+ (((ctx)->display_type & VA_DISPLAY_MAJOR_MASK) == VA_DISPLAY_WAYLAND)
+#else
+/* Previous VA-API versions only supported VA/X11 (and VA/GLX) API */
+#define IS_VA_X11(ctx) 1
+#define IS_VA_WAYLAND(ctx) 0
+#endif
+
enum {
I965_SURFACETYPE_RGBA = 1,
I965_SURFACETYPE_YUV,
I965_SURFACETYPE_INDEXED
};
+/* List of supported display attributes */
+static const VADisplayAttribute i965_display_attributes[] = {
+ {
+ VADisplayAttribRotation,
+ 0, 3, VA_ROTATION_NONE,
+ VA_DISPLAY_ATTRIB_GETTABLE|VA_DISPLAY_ATTRIB_SETTABLE
+ },
+};
+
/* List of supported image formats */
typedef struct {
unsigned int type;
@@ -173,6 +205,13 @@ static struct hw_codec_info gen7_hw_codec_info = {
.max_height = 4096,
};
+static struct hw_codec_info gen75_hw_codec_info = {
+ .dec_hw_context_init = gen75_dec_hw_context_init,
+ .enc_hw_context_init = gen75_enc_hw_context_init,
+ .max_width = 4096,
+ .max_height = 4096,
+};
+
VAStatus
i965_QueryConfigProfiles(VADriverContextP ctx,
VAProfile *profile_list, /* out */
@@ -198,9 +237,11 @@ i965_QueryConfigProfiles(VADriverContextP ctx,
profile_list[i++] = VAProfileVC1Advanced;
}
+#ifdef HAVE_VA_JPEG_DECODE
if (HAS_JPEG(i965)) {
profile_list[i++] = VAProfileJPEGBaseline;
}
+#endif
/* If the assert fails then I965_MAX_PROFILES needs to be bigger */
assert(i <= I965_MAX_PROFILES);
@@ -506,8 +547,14 @@ i965_CreateSurfaces(VADriverContextP ctx,
obj_surface->orig_width = width;
obj_surface->orig_height = height;
- obj_surface->width = ALIGN(width, 16);
- obj_surface->height = ALIGN(height, 16);
+ if (IS_G4X(i965->intel.device_id) || IS_IRONLAKE(i965->intel.device_id)) {
+ obj_surface->width = ALIGN(width, 16);
+ obj_surface->height = ALIGN(height, 16);
+ } else {
+ obj_surface->width = ALIGN(width, 128);
+ obj_surface->height = ALIGN(height, 32);
+ }
+
obj_surface->flags = SURFACE_REFERENCED;
obj_surface->fourcc = 0;
obj_surface->bo = NULL;
@@ -984,8 +1031,8 @@ i965_create_buffer_internal(VADriverContextP ctx,
case VAEncSequenceParameterBufferType:
case VAEncPictureParameterBufferType:
case VAEncSliceParameterBufferType:
-#ifdef HAVE_JPEG_DECODING
- case VAHuffmanTableBufferType:
+#ifdef HAVE_VA_JPEG_DECODE
+ case VAHuffmanTableBufferType:
#endif
/* Ok */
break;
@@ -1334,7 +1381,7 @@ i965_decoder_render_picture(VADriverContextP ctx,
vaStatus = I965_RENDER_DECODE_BUFFER(slice_data);
break;
-#ifdef HAVE_JPEG_DECODING
+#ifdef HAVE_VA_JPEG_DECODE
case VAHuffmanTableBufferType:
vaStatus = I965_RENDER_DECODE_BUFFER(huffman_table);
break;
@@ -1479,6 +1526,9 @@ i965_SyncSurface(VADriverContextP ctx,
assert(obj_surface);
+ if(obj_surface->bo)
+ drm_intel_bo_wait_rendering(obj_surface->bo);
+
return VA_STATUS_SUCCESS;
}
@@ -1492,19 +1542,70 @@ i965_QuerySurfaceStatus(VADriverContextP ctx,
assert(obj_surface);
- /* Usually GEM will handle synchronization with the graphics hardware */
-#if 0
if (obj_surface->bo) {
- dri_bo_map(obj_surface->bo, 0);
- dri_bo_unmap(obj_surface->bo);
+ if (drm_intel_bo_busy(obj_surface->bo)){
+ *status = VASurfaceRendering;
+ }
+ else {
+ *status = VASurfaceReady;
+ }
+ } else {
+ *status = VASurfaceReady;
}
-#endif
-
- *status = obj_surface->status;
return VA_STATUS_SUCCESS;
}
+static VADisplayAttribute *
+get_display_attribute(VADriverContextP ctx, VADisplayAttribType type)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ unsigned int i;
+
+ if (!i965->display_attributes)
+ return NULL;
+
+ for (i = 0; i < i965->num_display_attributes; i++) {
+ if (i965->display_attributes[i].type == type)
+ return &i965->display_attributes[i];
+ }
+ return NULL;
+}
+
+static bool
+i965_display_attributes_init(VADriverContextP ctx)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+
+ i965->num_display_attributes = ARRAY_ELEMS(i965_display_attributes);
+ i965->display_attributes = malloc(
+ i965->num_display_attributes * sizeof(i965->display_attributes[0]));
+ if (!i965->display_attributes)
+ return false;
+
+ memcpy(
+ i965->display_attributes,
+ i965_display_attributes,
+ sizeof(i965_display_attributes)
+ );
+
+ i965->rotation_attrib = get_display_attribute(ctx, VADisplayAttribRotation);
+ if (!i965->rotation_attrib)
+ return false;
+ return true;
+}
+
+static void
+i965_display_attributes_terminate(VADriverContextP ctx)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+
+ if (i965->display_attributes) {
+ free(i965->display_attributes);
+ i965->display_attributes = NULL;
+ i965->num_display_attributes = 0;
+ }
+}
/*
* Query display attributes
@@ -1513,12 +1614,19 @@ i965_QuerySurfaceStatus(VADriverContextP ctx,
* returned in "attr_list" is returned in "num_attributes".
*/
VAStatus
-i965_QueryDisplayAttributes(VADriverContextP ctx,
- VADisplayAttribute *attr_list, /* out */
- int *num_attributes) /* out */
+i965_QueryDisplayAttributes(
+ VADriverContextP ctx,
+ VADisplayAttribute *attribs, /* out */
+ int *num_attribs_ptr /* out */
+)
{
- if (num_attributes)
- *num_attributes = 0;
+ const int num_attribs = ARRAY_ELEMS(i965_display_attributes);
+
+ if (attribs && num_attribs > 0)
+ memcpy(attribs, i965_display_attributes, sizeof(i965_display_attributes));
+
+ if (num_attribs_ptr)
+ *num_attribs_ptr = num_attribs;
return VA_STATUS_SUCCESS;
}
@@ -1530,12 +1638,27 @@ i965_QueryDisplayAttributes(VADriverContextP ctx,
* from vaQueryDisplayAttributes() can have their values retrieved.
*/
VAStatus
-i965_GetDisplayAttributes(VADriverContextP ctx,
- VADisplayAttribute *attr_list, /* in/out */
- int num_attributes)
+i965_GetDisplayAttributes(
+ VADriverContextP ctx,
+ VADisplayAttribute *attribs, /* inout */
+ int num_attribs /* in */
+)
{
- /* TODO */
- return VA_STATUS_ERROR_UNIMPLEMENTED;
+ int i;
+
+ for (i = 0; i < num_attribs; i++) {
+ VADisplayAttribute *src_attrib, * const dst_attrib = &attribs[i];
+
+ src_attrib = get_display_attribute(ctx, dst_attrib->type);
+ if (src_attrib && (src_attrib->flags & VA_DISPLAY_ATTRIB_GETTABLE)) {
+ dst_attrib->min_value = src_attrib->min_value;
+ dst_attrib->max_value = src_attrib->max_value;
+ dst_attrib->value = src_attrib->value;
+ }
+ else
+ dst_attrib->flags = VA_DISPLAY_ATTRIB_NOT_SUPPORTED;
+ }
+ return VA_STATUS_SUCCESS;
}
/*
@@ -1545,12 +1668,32 @@ i965_GetDisplayAttributes(VADriverContextP ctx,
* the value is out of range, the function returns VA_STATUS_ERROR_ATTR_NOT_SUPPORTED
*/
VAStatus
-i965_SetDisplayAttributes(VADriverContextP ctx,
- VADisplayAttribute *attr_list,
- int num_attributes)
+i965_SetDisplayAttributes(
+ VADriverContextP ctx,
+ VADisplayAttribute *attribs, /* in */
+ int num_attribs /* in */
+)
{
- /* TODO */
- return VA_STATUS_ERROR_UNIMPLEMENTED;
+ int i;
+
+ for (i = 0; i < num_attribs; i++) {
+ VADisplayAttribute *dst_attrib, * const src_attrib = &attribs[i];
+
+ dst_attrib = get_display_attribute(ctx, src_attrib->type);
+ if (!dst_attrib)
+ return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
+
+ if (!(dst_attrib->flags & VA_DISPLAY_ATTRIB_SETTABLE))
+ continue;
+
+ if (src_attrib->value < dst_attrib->min_value ||
+ src_attrib->value > dst_attrib->max_value)
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+
+ dst_attrib->value = src_attrib->value;
+ /* XXX: track modified attributes through timestamps */
+ }
+ return VA_STATUS_SUCCESS;
}
VAStatus
@@ -1571,7 +1714,9 @@ i965_Init(VADriverContextP ctx)
if (intel_driver_init(ctx) == False)
return VA_STATUS_ERROR_UNKNOWN;
- if (IS_G4X(i965->intel.device_id))
+ if (IS_HASWELL(i965->intel.device_id))
+ i965->codec_info = &gen75_hw_codec_info;
+ else if (IS_G4X(i965->intel.device_id))
i965->codec_info = &g4x_hw_codec_info;
else if (IS_IRONLAKE(i965->intel.device_id))
i965->codec_info = &ironlake_hw_codec_info;
@@ -1582,14 +1727,28 @@ i965_Init(VADriverContextP ctx)
else
return VA_STATUS_ERROR_UNKNOWN;
+ i965->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
+
+ if (!i965_display_attributes_init(ctx))
+ return VA_STATUS_ERROR_UNKNOWN;
+
if (i965_post_processing_init(ctx) == False)
return VA_STATUS_ERROR_UNKNOWN;
if (i965_render_init(ctx) == False)
return VA_STATUS_ERROR_UNKNOWN;
+#ifdef HAVE_VA_WAYLAND
+ if (IS_VA_WAYLAND(ctx) && !i965_output_wayland_init(ctx))
+ return VA_STATUS_ERROR_UNKNOWN;
+#endif
+
+#ifdef HAVE_VA_X11
+ if (IS_VA_X11(ctx) && !i965_output_dri_init(ctx))
+ return VA_STATUS_ERROR_UNKNOWN;
+#endif
+
_i965InitMutex(&i965->render_mutex);
- i965->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER);
return VA_STATUS_SUCCESS;
}
@@ -2274,109 +2433,25 @@ i965_PutSurface(VADriverContextP ctx,
unsigned int number_cliprects, /* number of clip rects in the clip list */
unsigned int flags) /* de-interlacing flags */
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct dri_state *dri_state = (struct dri_state *)ctx->dri_state;
- struct i965_render_state *render_state = &i965->render_state;
- struct dri_drawable *dri_drawable;
- union dri_buffer *buffer;
- struct intel_region *dest_region;
- struct object_surface *obj_surface;
- VARectangle src_rect, dst_rect;
- int ret;
- uint32_t name;
- Bool new_region = False;
- int pp_flag = 0;
-
- /* Currently don't support DRI1 */
- if (dri_state->driConnectedFlag != VA_DRI2)
- return VA_STATUS_ERROR_UNKNOWN;
-
- /* Some broken sources such as H.264 conformance case FM2_SVA_C
- * will get here
- */
- obj_surface = SURFACE(surface);
- if (!obj_surface || !obj_surface->bo)
- return VA_STATUS_SUCCESS;
-
- _i965LockMutex(&i965->render_mutex);
-
- dri_drawable = dri_get_drawable(ctx, (Drawable)draw);
- assert(dri_drawable);
-
- buffer = dri_get_rendering_buffer(ctx, dri_drawable);
- assert(buffer);
-
- dest_region = render_state->draw_region;
-
- if (dest_region) {
- assert(dest_region->bo);
- dri_bo_flink(dest_region->bo, &name);
-
- if (buffer->dri2.name != name) {
- new_region = True;
- dri_bo_unreference(dest_region->bo);
- }
- } else {
- dest_region = (struct intel_region *)calloc(1, sizeof(*dest_region));
- assert(dest_region);
- render_state->draw_region = dest_region;
- new_region = True;
- }
-
- if (new_region) {
- dest_region->x = dri_drawable->x;
- dest_region->y = dri_drawable->y;
- dest_region->width = dri_drawable->width;
- dest_region->height = dri_drawable->height;
- dest_region->cpp = buffer->dri2.cpp;
- dest_region->pitch = buffer->dri2.pitch;
-
- dest_region->bo = intel_bo_gem_create_from_name(i965->intel.bufmgr, "rendering buffer", buffer->dri2.name);
- assert(dest_region->bo);
-
- ret = dri_bo_get_tiling(dest_region->bo, &(dest_region->tiling), &(dest_region->swizzle));
- assert(ret == 0);
- }
-
- if ((flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC)
- pp_flag |= I965_PP_FLAG_AVS;
-
- if (flags & VA_TOP_FIELD)
- pp_flag |= I965_PP_FLAG_TOP_FIELD;
- else if (flags & VA_BOTTOM_FIELD)
- pp_flag |= I965_PP_FLAG_BOTTOM_FIELD;
-
- src_rect.x = srcx;
- src_rect.y = srcy;
- src_rect.width = srcw;
- src_rect.height = srch;
+#ifdef HAVE_VA_X11
+ if (IS_VA_X11(ctx)) {
+ VARectangle src_rect, dst_rect;
- dst_rect.x = destx;
- dst_rect.y = desty;
- dst_rect.width = destw;
- dst_rect.height = desth;
+ src_rect.x = srcx;
+ src_rect.y = srcy;
+ src_rect.width = srcw;
+ src_rect.height = srch;
- intel_render_put_surface(ctx, surface, &src_rect, &dst_rect, pp_flag);
+ dst_rect.x = destx;
+ dst_rect.y = desty;
+ dst_rect.width = destw;
+ dst_rect.height = desth;
- if(obj_surface->subpic != VA_INVALID_ID) {
- intel_render_put_subpicture(ctx, surface, &src_rect, &dst_rect);
+ return i965_put_surface_dri(ctx, surface, draw, &src_rect, &dst_rect,
+ cliprects, number_cliprects, flags);
}
-
- dri_swap_buffer(ctx, dri_drawable);
- obj_surface->flags |= SURFACE_DISPLAYED;
-
- if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
- dri_bo_unreference(obj_surface->bo);
- obj_surface->bo = NULL;
- obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
-
- if (obj_surface->free_private_data)
- obj_surface->free_private_data(&obj_surface->private_data);
- }
-
- _i965UnlockMutex(&i965->render_mutex);
-
- return VA_STATUS_SUCCESS;
+#endif
+ return VA_STATUS_ERROR_UNIMPLEMENTED;
}
VAStatus
@@ -2389,14 +2464,23 @@ i965_Terminate(VADriverContextP ctx)
_i965DestroyMutex(&i965->render_mutex);
+#ifdef HAVE_VA_X11
+ if (IS_VA_X11(ctx))
+ i965_output_dri_terminate(ctx);
+#endif
+
+#ifdef HAVE_VA_WAYLAND
+ if (IS_VA_WAYLAND(ctx))
+ i965_output_wayland_terminate(ctx);
+#endif
+
if (i965_render_terminate(ctx) == False)
return VA_STATUS_ERROR_UNKNOWN;
if (i965_post_processing_terminate(ctx) == False)
return VA_STATUS_ERROR_UNKNOWN;
- if (intel_driver_terminate(ctx) == False)
- return VA_STATUS_ERROR_UNKNOWN;
+ i965_display_attributes_terminate(ctx);
i965_destroy_heap(&i965->buffer_heap, i965_destroy_buffer);
i965_destroy_heap(&i965->image_heap, i965_destroy_image);
@@ -2405,6 +2489,9 @@ i965_Terminate(VADriverContextP ctx)
i965_destroy_heap(&i965->context_heap, i965_destroy_context);
i965_destroy_heap(&i965->config_heap, i965_destroy_config);
+ if (intel_driver_terminate(ctx) == False)
+ return VA_STATUS_ERROR_UNKNOWN;
+
free(ctx->pDriverData);
ctx->pDriverData = NULL;
@@ -2529,11 +2616,11 @@ i965_UnlockSurface(
if (obj_surface == NULL) {
vaStatus = VA_STATUS_ERROR_INVALID_PARAMETER; // Surface is absent
- goto error;
+ return vaStatus;
}
if (obj_surface->locked_image_id == VA_INVALID_ID) {
vaStatus = VA_STATUS_ERROR_INVALID_PARAMETER; // Surface is not locked
- goto error;
+ return vaStatus;
}
locked_img = IMAGE(obj_surface->locked_image_id);
@@ -2560,6 +2647,8 @@ i965_UnlockSurface(
locked_img->image.image_id = VA_INVALID_ID;
error:
+ obj_surface->locked_image_id = VA_INVALID_ID;
+
return vaStatus;
}
@@ -2580,7 +2669,7 @@ VA_DRIVER_INIT_FUNC( VADriverContextP ctx )
ctx->max_attributes = I965_MAX_CONFIG_ATTRIBUTES;
ctx->max_image_formats = I965_MAX_IMAGE_FORMATS;
ctx->max_subpic_formats = I965_MAX_SUBPIC_FORMATS;
- ctx->max_display_attributes = I965_MAX_DISPLAY_ATTRIBUTES;
+ ctx->max_display_attributes = 1 + ARRAY_ELEMS(i965_display_attributes);
vtable->vaTerminate = i965_Terminate;
vtable->vaQueryConfigEntrypoints = i965_QueryConfigEntrypoints;
diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
index ab993bc..de4f747 100644
--- a/src/i965_drv_video.h
+++ b/src/i965_drv_video.h
@@ -42,11 +42,24 @@
#define I965_MAX_CONFIG_ATTRIBUTES 10
#define I965_MAX_IMAGE_FORMATS 3
#define I965_MAX_SUBPIC_FORMATS 4
-#define I965_MAX_DISPLAY_ATTRIBUTES 4
#define INTEL_STR_DRIVER_VENDOR "Intel"
#define INTEL_STR_DRIVER_NAME "i965"
+#define I965_SURFACE_TYPE_IMAGE 0
+#define I965_SURFACE_TYPE_SURFACE 1
+
+#define I965_SURFACE_FLAG_FRAME 0x00000000
+#define I965_SURFACE_FLAG_TOP_FIELD_FIRST 0x00000001
+#define I965_SURFACE_FLAG_BOTTOM_FIELD_FIRST 0x00000002
+
+struct i965_surface
+{
+ VAGenericID id;
+ int type;
+ int flags;
+};
+
struct i965_kernel
{
char *name;
@@ -232,6 +245,16 @@ struct i965_driver_data
struct i965_render_state render_state;
void *pp_context;
char va_vendor[256];
+
+ VADisplayAttribute *display_attributes;
+ unsigned int num_display_attributes;
+ VADisplayAttribute *rotation_attrib;
+
+ /* VA/DRI (X11) specific data */
+ struct va_dri_output *dri_output;
+
+ /* VA/Wayland specific data */
+ struct va_wl_output *wl_output;
};
#define NEW_CONFIG_ID() object_heap_allocate(&i965->config_heap);
@@ -267,4 +290,11 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
unsigned int fourcc,
unsigned int subsampling);
+
+extern VAStatus i965_MapBuffer(VADriverContextP ctx,
+ VABufferID buf_id, /* in */
+ void **pbuf); /* out */
+
+extern VAStatus i965_UnmapBuffer(VADriverContextP ctx, VABufferID buf_id);
+
#endif /* _I965_DRV_VIDEO_H_ */
diff --git a/src/i965_encoder.c b/src/i965_encoder.c
index c58eb21..6d58e39 100644
--- a/src/i965_encoder.c
+++ b/src/i965_encoder.c
@@ -37,6 +37,8 @@
#include "i965_defines.h"
#include "i965_drv_video.h"
#include "i965_encoder.h"
+#include "gen6_vme.h"
+#include "gen6_mfc.h"
static void
gen6_encoder_end_picture(VADriverContextP ctx,
@@ -72,10 +74,53 @@ gen6_enc_hw_context_init(VADriverContextP ctx, VAProfile profile)
gen6_encoder_context->base.destroy = gen6_encoder_context_destroy;
gen6_encoder_context->base.run = gen6_encoder_end_picture;
- gen6_encoder_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
+ gen6_encoder_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
gen6_vme_context_init(ctx, &gen6_encoder_context->vme_context);
gen6_mfc_context_init(ctx, &gen6_encoder_context->mfc_context);
return (struct hw_context *)gen6_encoder_context;
}
+
+static void
+gen75_encoder_end_picture(VADriverContextP ctx,
+ VAProfile profile,
+ union codec_state *codec_state,
+ struct hw_context *hw_context)
+{
+ struct gen6_encoder_context *gen6_encoder_context = (struct gen6_encoder_context *)hw_context;
+ struct encode_state *encode_state = &codec_state->encode;
+ VAStatus vaStatus;
+
+ vaStatus = gen75_vme_pipeline(ctx, profile, encode_state, gen6_encoder_context);
+
+ if (vaStatus == VA_STATUS_SUCCESS)
+ gen75_mfc_pipeline(ctx, profile, encode_state, gen6_encoder_context);
+}
+static void
+gen75_encoder_context_destroy(void *hw_context)
+{
+ struct gen6_encoder_context *gen6_encoder_context = (struct gen6_encoder_context *)hw_context;
+
+ gen75_mfc_context_destroy(&gen6_encoder_context->mfc_context);
+ gen75_vme_context_destroy(&gen6_encoder_context->vme_context);
+ intel_batchbuffer_free(gen6_encoder_context->base.batch);
+ free(gen6_encoder_context);
+}
+
+
+struct hw_context *
+gen75_enc_hw_context_init(VADriverContextP ctx, VAProfile profile)
+{
+ struct intel_driver_data *intel = intel_driver_data(ctx);
+ struct gen6_encoder_context *gen6_encoder_context = calloc(1, sizeof(struct gen6_encoder_context));
+
+ gen6_encoder_context->base.destroy = gen75_encoder_context_destroy;
+ gen6_encoder_context->base.run = gen75_encoder_end_picture;
+ gen6_encoder_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
+
+ gen75_vme_context_init(ctx, &gen6_encoder_context->vme_context);
+ gen75_mfc_context_init(ctx, &gen6_encoder_context->mfc_context);
+
+ return (struct hw_context *)gen6_encoder_context;
+}
diff --git a/src/i965_encoder.h b/src/i965_encoder.h
index 555efe3..fb989e0 100644
--- a/src/i965_encoder.h
+++ b/src/i965_encoder.h
@@ -46,6 +46,9 @@ struct gen6_encoder_context
struct gen6_mfc_context mfc_context;
};
+extern struct hw_context *
+gen75_enc_hw_context_init(VADriverContextP ctx, VAProfile profile);
+
#endif /* _I965_ENCODER_H_ */
diff --git a/src/i965_media.c b/src/i965_media.c
index f43feed..432f8ad 100644
--- a/src/i965_media.c
+++ b/src/i965_media.c
@@ -328,7 +328,7 @@ g4x_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
media_context->base.destroy = i965_media_context_destroy;
media_context->base.run = i965_media_decode_picture;
- media_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
+ media_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
switch (profile) {
case VAProfileMPEG2Simple:
@@ -358,7 +358,7 @@ ironlake_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
media_context->base.destroy = i965_media_context_destroy;
media_context->base.run = i965_media_decode_picture;
- media_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
+ media_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
switch (profile) {
case VAProfileMPEG2Simple:
diff --git a/src/i965_output_dri.c b/src/i965_output_dri.c
new file mode 100644
index 0000000..5757ce8
--- /dev/null
+++ b/src/i965_output_dri.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sysdeps.h"
+#include <va/va_dricommon.h>
+#include "i965_drv_video.h"
+#include "i965_output_dri.h"
+#include "dso_utils.h"
+
+#define LIBVA_X11_NAME "libva-x11.so.1"
+
+typedef struct dri_drawable *(*dri_get_drawable_func)(
+ VADriverContextP ctx, XID drawable);
+typedef union dri_buffer *(*dri_get_rendering_buffer_func)(
+ VADriverContextP ctx, struct dri_drawable *d);
+typedef void (*dri_swap_buffer_func)(
+ VADriverContextP ctx, struct dri_drawable *d);
+
+struct dri_vtable {
+ dri_get_drawable_func get_drawable;
+ dri_get_rendering_buffer_func get_rendering_buffer;
+ dri_swap_buffer_func swap_buffer;
+};
+
+struct va_dri_output {
+ struct dso_handle *handle;
+ struct dri_vtable vtable;
+};
+
+bool
+i965_output_dri_init(VADriverContextP ctx)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct dso_handle *dso_handle;
+ struct dri_vtable *dri_vtable;
+
+ static const struct dso_symbol symbols[] = {
+ { "dri_get_drawable",
+ offsetof(struct dri_vtable, get_drawable) },
+ { "dri_get_rendering_buffer",
+ offsetof(struct dri_vtable, get_rendering_buffer) },
+ { "dri_swap_buffer",
+ offsetof(struct dri_vtable, swap_buffer) },
+ { NULL, }
+ };
+
+ i965->dri_output = calloc(1, sizeof(struct va_dri_output));
+ if (!i965->dri_output)
+ goto error;
+
+ i965->dri_output->handle = dso_open(LIBVA_X11_NAME);
+ if (!i965->dri_output->handle)
+ goto error;
+
+ dso_handle = i965->dri_output->handle;
+ dri_vtable = &i965->dri_output->vtable;
+ if (!dso_get_symbols(dso_handle, dri_vtable, sizeof(*dri_vtable), symbols))
+ goto error;
+ return true;
+
+error:
+ i965_output_dri_terminate(ctx);
+ return false;
+}
+
+void
+i965_output_dri_terminate(VADriverContextP ctx)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct va_dri_output * const dri_output = i965->dri_output;
+
+ if (!dri_output)
+ return;
+
+ if (dri_output->handle) {
+ dso_close(dri_output->handle);
+ dri_output->handle = NULL;
+ }
+
+ free(dri_output);
+ i965->dri_output = NULL;
+}
+
+VAStatus
+i965_put_surface_dri(
+ VADriverContextP ctx,
+ VASurfaceID surface,
+ void *draw,
+ const VARectangle *src_rect,
+ const VARectangle *dst_rect,
+ const VARectangle *cliprects,
+ unsigned int num_cliprects,
+ unsigned int flags
+)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct dri_vtable * const dri_vtable = &i965->dri_output->vtable;
+ struct i965_render_state * const render_state = &i965->render_state;
+ struct dri_drawable *dri_drawable;
+ union dri_buffer *buffer;
+ struct intel_region *dest_region;
+ struct object_surface *obj_surface;
+ unsigned int pp_flag = 0;
+ bool new_region = false;
+ uint32_t name;
+ int ret;
+
+ /* Currently don't support DRI1 */
+ if (!VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI2))
+ return VA_STATUS_ERROR_UNKNOWN;
+
+ /* Some broken sources such as H.264 conformance case FM2_SVA_C
+ * will get here
+ */
+ obj_surface = SURFACE(surface);
+ if (!obj_surface || !obj_surface->bo)
+ return VA_STATUS_SUCCESS;
+
+ _i965LockMutex(&i965->render_mutex);
+
+ dri_drawable = dri_vtable->get_drawable(ctx, (Drawable)draw);
+ assert(dri_drawable);
+
+ buffer = dri_vtable->get_rendering_buffer(ctx, dri_drawable);
+ assert(buffer);
+
+ dest_region = render_state->draw_region;
+
+ if (dest_region) {
+ assert(dest_region->bo);
+ dri_bo_flink(dest_region->bo, &name);
+
+ if (buffer->dri2.name != name) {
+ new_region = True;
+ dri_bo_unreference(dest_region->bo);
+ }
+ } else {
+ dest_region = (struct intel_region *)calloc(1, sizeof(*dest_region));
+ assert(dest_region);
+ render_state->draw_region = dest_region;
+ new_region = True;
+ }
+
+ if (new_region) {
+ dest_region->x = dri_drawable->x;
+ dest_region->y = dri_drawable->y;
+ dest_region->width = dri_drawable->width;
+ dest_region->height = dri_drawable->height;
+ dest_region->cpp = buffer->dri2.cpp;
+ dest_region->pitch = buffer->dri2.pitch;
+
+ dest_region->bo = intel_bo_gem_create_from_name(i965->intel.bufmgr, "rendering buffer", buffer->dri2.name);
+ assert(dest_region->bo);
+
+ ret = dri_bo_get_tiling(dest_region->bo, &(dest_region->tiling), &(dest_region->swizzle));
+ assert(ret == 0);
+ }
+
+ if ((flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC)
+ pp_flag |= I965_PP_FLAG_AVS;
+
+ if (flags & VA_TOP_FIELD)
+ pp_flag |= I965_PP_FLAG_TOP_FIELD;
+ else if (flags & VA_BOTTOM_FIELD)
+ pp_flag |= I965_PP_FLAG_BOTTOM_FIELD;
+
+ intel_render_put_surface(ctx, surface, src_rect, dst_rect, pp_flag);
+
+ if(obj_surface->subpic != VA_INVALID_ID) {
+ intel_render_put_subpicture(ctx, surface, src_rect, dst_rect);
+ }
+
+ dri_vtable->swap_buffer(ctx, dri_drawable);
+ obj_surface->flags |= SURFACE_DISPLAYED;
+
+ if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
+ dri_bo_unreference(obj_surface->bo);
+ obj_surface->bo = NULL;
+ obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
+
+ if (obj_surface->free_private_data)
+ obj_surface->free_private_data(&obj_surface->private_data);
+ }
+
+ _i965UnlockMutex(&i965->render_mutex);
+
+ return VA_STATUS_SUCCESS;
+}
diff --git a/src/i965_output_dri.h b/src/i965_output_dri.h
new file mode 100644
index 0000000..cf37b14
--- /dev/null
+++ b/src/i965_output_dri.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef I965_OUTPUT_DRI_H
+#define I965_OUTPUT_DRI_H
+
+#include <stdbool.h>
+#include <va/va_backend.h>
+
+bool
+i965_output_dri_init(VADriverContextP ctx);
+
+void
+i965_output_dri_terminate(VADriverContextP ctx);
+
+VAStatus
+i965_put_surface_dri(
+ VADriverContextP ctx,
+ VASurfaceID surface,
+ void *draw,
+ const VARectangle *src_rect,
+ const VARectangle *dst_rect,
+ const VARectangle *cliprects,
+ unsigned int num_cliprects,
+ unsigned int flags
+);
+
+#endif /* I965_OUTPUT_DRI_H */
diff --git a/src/i965_output_wayland.c b/src/i965_output_wayland.c
new file mode 100644
index 0000000..30ffb71
--- /dev/null
+++ b/src/i965_output_wayland.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <va/va_backend.h>
+#include <va/va_backend_wayland.h>
+#include <wayland-client.h>
+#include <wayland-drm-client-protocol.h>
+#include "intel_driver.h"
+#include "i965_output_wayland.h"
+#include "i965_drv_video.h"
+#include "i965_defines.h"
+#include "dso_utils.h"
+
+#define LIBEGL_NAME "libEGL.so.1"
+#define LIBWAYLAND_CLIENT_NAME "libwayland-client.so.0"
+
+typedef uint32_t (*wl_display_get_global_func)(struct wl_display *display,
+ const char *interface, uint32_t version);
+typedef void *(*wl_display_bind_func)(struct wl_display *display,
+ uint32_t name, const struct wl_interface *interface);
+typedef void (*wl_display_roundtrip_func)(struct wl_display *display);
+
+typedef struct wl_proxy *(*wl_proxy_create_func)(struct wl_proxy *factory,
+ const struct wl_interface *interface);
+typedef void (*wl_proxy_destroy_func)(struct wl_proxy *proxy);
+typedef void (*wl_proxy_marshal_func)(struct wl_proxy *p, uint32_t opcode, ...);
+
+struct wl_vtable {
+ const struct wl_interface *buffer_interface;
+ const struct wl_interface *drm_interface;
+ wl_display_get_global_func display_get_global;
+ wl_display_bind_func display_bind;
+ wl_display_roundtrip_func display_roundtrip;
+ wl_proxy_create_func proxy_create;
+ wl_proxy_destroy_func proxy_destroy;
+ wl_proxy_marshal_func proxy_marshal;
+};
+
+struct va_wl_output {
+ struct dso_handle *libegl_handle;
+ struct dso_handle *libwl_client_handle;
+ struct wl_vtable vtable;
+ struct wl_drm *wl_drm;
+};
+
+/* Ensure wl_drm instance is created */
+static bool
+ensure_wl_output(VADriverContextP ctx)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct va_wl_output * const wl_output = i965->wl_output;
+ struct wl_vtable * const wl_vtable = &wl_output->vtable;
+ uint32_t id;
+
+ if (wl_output->wl_drm)
+ return true;
+
+ id = wl_vtable->display_get_global(ctx->native_dpy, "wl_drm", 1);
+ if (!id) {
+ wl_vtable->display_roundtrip(ctx->native_dpy);
+ id = wl_vtable->display_get_global(ctx->native_dpy, "wl_drm", 1);
+ if (!id)
+ return false;
+ }
+
+ wl_output->wl_drm =
+ wl_vtable->display_bind(ctx->native_dpy, id, wl_vtable->drm_interface);
+ if (!wl_output->wl_drm)
+ return false;
+ return true;
+}
+
+/* Create planar YUV buffer */
+static struct wl_buffer *
+create_planar_buffer(
+ struct va_wl_output *wl_output,
+ uint32_t name,
+ int32_t width,
+ int32_t height,
+ uint32_t format,
+ int32_t offsets[3],
+ int32_t pitches[3]
+)
+{
+ struct wl_vtable * const wl_vtable = &wl_output->vtable;
+ struct wl_proxy *id;
+
+ id = wl_vtable->proxy_create(
+ (struct wl_proxy *)wl_output->wl_drm,
+ wl_vtable->buffer_interface
+ );
+ if (!id)
+ return NULL;
+
+ wl_vtable->proxy_marshal(
+ (struct wl_proxy *)wl_output->wl_drm,
+ WL_DRM_CREATE_PLANAR_BUFFER,
+ id,
+ name,
+ width, height, format,
+ offsets[0], pitches[0],
+ offsets[1], pitches[1],
+ offsets[2], pitches[2]
+ );
+ return (struct wl_buffer *)id;
+}
+
+/* Hook to return Wayland buffer associated with the VA surface */
+static VAStatus
+va_GetSurfaceBufferWl(
+ struct VADriverContext *ctx,
+ VASurfaceID surface,
+ unsigned int flags,
+ struct wl_buffer **out_buffer
+)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ struct wl_buffer *buffer;
+ uint32_t name, drm_format;
+ int offsets[3], pitches[3];
+
+ obj_surface = SURFACE(surface);
+ if (!obj_surface)
+ return VA_STATUS_ERROR_INVALID_SURFACE;
+
+ if (flags != VA_FRAME_PICTURE)
+ return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
+
+ if (!out_buffer)
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+
+ if (!ensure_wl_output(ctx))
+ return VA_STATUS_ERROR_INVALID_DISPLAY;
+
+ if (drm_intel_bo_flink(obj_surface->bo, &name) != 0)
+ return VA_STATUS_ERROR_INVALID_SURFACE;
+
+ switch (obj_surface->fourcc) {
+ case VA_FOURCC('N','V','1','2'):
+ drm_format = WL_DRM_FORMAT_NV12;
+ offsets[0] = 0;
+ pitches[0] = obj_surface->width;
+ offsets[1] = obj_surface->width * obj_surface->y_cb_offset;
+ pitches[1] = obj_surface->cb_cr_pitch;
+ offsets[2] = 0;
+ pitches[2] = 0;
+ break;
+ case VA_FOURCC('Y','V','1','2'):
+ case VA_FOURCC('I','4','2','0'):
+ case VA_FOURCC('I','M','C','1'):
+ switch (obj_surface->subsampling) {
+ case SUBSAMPLE_YUV411:
+ drm_format = WL_DRM_FORMAT_YUV411;
+ break;
+ case SUBSAMPLE_YUV420:
+ drm_format = WL_DRM_FORMAT_YUV420;
+ break;
+ case SUBSAMPLE_YUV422H:
+ case SUBSAMPLE_YUV422V:
+ drm_format = WL_DRM_FORMAT_YUV422;
+ break;
+ case SUBSAMPLE_YUV444:
+ drm_format = WL_DRM_FORMAT_YUV444;
+ break;
+ default:
+ assert(0 && "unsupported subsampling");
+ return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
+ }
+ offsets[0] = 0;
+ pitches[0] = obj_surface->width;
+ offsets[1] = obj_surface->width * obj_surface->y_cb_offset;
+ pitches[1] = obj_surface->cb_cr_pitch;
+ offsets[2] = obj_surface->width * obj_surface->y_cr_offset;
+ pitches[2] = obj_surface->cb_cr_pitch;
+ break;
+ default:
+ assert(0 && "unsupported format");
+ return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
+ }
+
+ buffer = create_planar_buffer(
+ i965->wl_output,
+ name,
+ obj_surface->orig_width,
+ obj_surface->orig_height,
+ drm_format,
+ offsets,
+ pitches
+ );
+ if (!buffer)
+ return VA_STATUS_ERROR_ALLOCATION_FAILED;
+
+ *out_buffer = buffer;
+ return VA_STATUS_SUCCESS;
+}
+
+/* Hook to return Wayland buffer associated with the VA image */
+static VAStatus
+va_GetImageBufferWl(
+ struct VADriverContext *ctx,
+ VAImageID image,
+ unsigned int flags,
+ struct wl_buffer **out_buffer
+)
+{
+ return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+bool
+ensure_driver_vtable(VADriverContextP ctx)
+{
+ struct VADriverVTableWayland * const vtable = ctx->vtable_wayland;
+
+ if (!vtable)
+ return false;
+
+ vtable->vaGetSurfaceBufferWl = va_GetSurfaceBufferWl;
+ vtable->vaGetImageBufferWl = va_GetImageBufferWl;
+ return true;
+}
+
+bool
+i965_output_wayland_init(VADriverContextP ctx)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct dso_handle *dso_handle;
+ struct wl_vtable *wl_vtable;
+
+ static const struct dso_symbol libegl_symbols[] = {
+ { "wl_drm_interface",
+ offsetof(struct wl_vtable, drm_interface) },
+ { NULL, }
+ };
+
+ static const struct dso_symbol libwl_client_symbols[] = {
+ { "wl_buffer_interface",
+ offsetof(struct wl_vtable, buffer_interface) },
+ { "wl_display_get_global",
+ offsetof(struct wl_vtable, display_get_global) },
+ { "wl_display_bind",
+ offsetof(struct wl_vtable, display_bind) },
+ { "wl_display_roundtrip",
+ offsetof(struct wl_vtable, display_roundtrip) },
+ { "wl_proxy_create",
+ offsetof(struct wl_vtable, proxy_create) },
+ { "wl_proxy_destroy",
+ offsetof(struct wl_vtable, proxy_destroy) },
+ { "wl_proxy_marshal",
+ offsetof(struct wl_vtable, proxy_marshal) },
+ { NULL, }
+ };
+
+ if (ctx->display_type != VA_DISPLAY_WAYLAND)
+ return false;
+
+ i965->wl_output = calloc(1, sizeof(struct va_wl_output));
+ if (!i965->wl_output)
+ goto error;
+
+ i965->wl_output->libegl_handle = dso_open(LIBEGL_NAME);
+ if (!i965->wl_output->libegl_handle)
+ goto error;
+
+ dso_handle = i965->wl_output->libegl_handle;
+ wl_vtable = &i965->wl_output->vtable;
+ if (!dso_get_symbols(dso_handle, wl_vtable, sizeof(*wl_vtable),
+ libegl_symbols))
+ goto error;
+
+ i965->wl_output->libwl_client_handle = dso_open(LIBWAYLAND_CLIENT_NAME);
+ if (!i965->wl_output->libwl_client_handle)
+ goto error;
+
+ dso_handle = i965->wl_output->libwl_client_handle;
+ wl_vtable = &i965->wl_output->vtable;
+ if (!dso_get_symbols(dso_handle, wl_vtable, sizeof(*wl_vtable),
+ libwl_client_symbols))
+ goto error;
+
+ if (!ensure_driver_vtable(ctx))
+ goto error;
+ return true;
+
+error:
+ i965_output_wayland_terminate(ctx);
+ return false;
+}
+
+void
+i965_output_wayland_terminate(VADriverContextP ctx)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct va_wl_output *wl_output;
+
+ if (ctx->display_type != VA_DISPLAY_WAYLAND)
+ return;
+
+ wl_output = i965->wl_output;
+ if (!wl_output)
+ return;
+
+ if (wl_output->wl_drm) {
+ wl_output->vtable.proxy_destroy((struct wl_proxy *)wl_output->wl_drm);
+ wl_output->wl_drm = NULL;
+ }
+
+ if (wl_output->libegl_handle) {
+ dso_close(wl_output->libegl_handle);
+ wl_output->libegl_handle = NULL;
+ }
+
+ if (wl_output->libwl_client_handle) {
+ dso_close(wl_output->libwl_client_handle);
+ wl_output->libwl_client_handle = NULL;
+ }
+ free(wl_output);
+ i965->wl_output = NULL;
+}
diff --git a/src/i965_output_wayland.h b/src/i965_output_wayland.h
new file mode 100644
index 0000000..61ca39f
--- /dev/null
+++ b/src/i965_output_wayland.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef I965_OUTPUT_WAYLAND_H
+#define I965_OUTPUT_WAYLAND_H
+
+#include <stdbool.h>
+
+bool
+i965_output_wayland_init(VADriverContextP ctx);
+
+void
+i965_output_wayland_terminate(VADriverContextP ctx);
+
+#endif /* I965_OUTPUT_WAYLAND_H */
diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c
old mode 100644
new mode 100755
index 6e238b4..f10cadd
--- a/src/i965_post_processing.c
+++ b/src/i965_post_processing.c
@@ -36,6 +36,7 @@
#include "i965_defines.h"
#include "i965_structs.h"
#include "i965_drv_video.h"
+#include "gen75_vpp_vebox.h"
#include "i965_post_processing.h"
#include "i965_render.h"
@@ -43,41 +44,113 @@
IS_GEN6((ctx)->intel.device_id) || \
IS_GEN7((ctx)->intel.device_id))
+#define SURFACE_STATE_PADDED_SIZE_0_I965 ALIGN(sizeof(struct i965_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_I965 ALIGN(sizeof(struct i965_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_I965 MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
+
+#define SURFACE_STATE_PADDED_SIZE_0_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
+#define SURFACE_STATE_PADDED_SIZE_1_GEN7 ALIGN(sizeof(struct gen7_surface_state2), 32)
+#define SURFACE_STATE_PADDED_SIZE_GEN7 MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
+
+#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
+#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
+
+#define GPU_ASM_BLOCK_WIDTH 16
+#define GPU_ASM_BLOCK_HEIGHT 8
+#define GPU_ASM_X_OFFSET_ALIGNMENT 4
+
static const uint32_t pp_null_gen5[][4] = {
-#include "shaders/post_processing/null.g4b.gen5"
+#include "shaders/post_processing/gen5_6/null.g4b.gen5"
+};
+
+static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
+};
+
+static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
};
-static const uint32_t pp_nv12_load_save_gen5[][4] = {
-#include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
+static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
+};
+
+static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
};
static const uint32_t pp_nv12_scaling_gen5[][4] = {
-#include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
+#include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
};
static const uint32_t pp_nv12_avs_gen5[][4] = {
-#include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
+#include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
};
static const uint32_t pp_nv12_dndi_gen5[][4] = {
-#include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
+#include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
+};
+
+static const uint32_t pp_nv12_dn_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
+};
+
+static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
+};
+
+static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
};
-static void pp_null_initialize(VADriverContextP ctx,
- VASurfaceID in_surface_id, VASurfaceID out_surface_id,
- const VARectangle *src_rect, const VARectangle *dst_rect);
-static void pp_nv12_avs_initialize(VADriverContextP ctx,
- VASurfaceID in_surface_id, VASurfaceID out_surface_id,
- const VARectangle *src_rect, const VARectangle *dst_rect);
-static void pp_nv12_scaling_initialize(VADriverContextP ctx,
- VASurfaceID in_surface_id, VASurfaceID out_surface_id,
- const VARectangle *src_rect, const VARectangle *dst_rect);
-static void pp_nv12_load_save_initialize(VADriverContextP ctx,
- VASurfaceID in_surface_id, VASurfaceID out_surface_id,
- const VARectangle *src_rect, const VARectangle *dst_rect);
-static void pp_nv12_dndi_initialize(VADriverContextP ctx,
- VASurfaceID in_surface_id, VASurfaceID out_surface_id,
- const VARectangle *src_rect, const VARectangle *dst_rect);
+static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
+#include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
+};
+
+static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
+static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
+static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
+static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
+static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
+static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
+
+static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
static struct pp_module pp_modules_gen5[] = {
{
@@ -94,14 +167,50 @@ static struct pp_module pp_modules_gen5[] = {
{
{
- "NV12 Load & Save module",
- PP_NV12_LOAD_SAVE,
- pp_nv12_load_save_gen5,
- sizeof(pp_nv12_load_save_gen5),
+ "NV12_NV12",
+ PP_NV12_LOAD_SAVE_N12,
+ pp_nv12_load_save_nv12_gen5,
+ sizeof(pp_nv12_load_save_nv12_gen5),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+
+ {
+ {
+ "NV12_PL3",
+ PP_NV12_LOAD_SAVE_PL3,
+ pp_nv12_load_save_pl3_gen5,
+ sizeof(pp_nv12_load_save_pl3_gen5),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+
+ {
+ {
+ "PL3_NV12",
+ PP_PL3_LOAD_SAVE_N12,
+ pp_pl3_load_save_nv12_gen5,
+ sizeof(pp_pl3_load_save_nv12_gen5),
NULL,
},
- pp_nv12_load_save_initialize,
+ pp_plx_load_save_plx_initialize,
+ },
+
+ {
+ {
+ "PL3_PL3",
+ PP_PL3_LOAD_SAVE_PL3,
+ pp_pl3_load_save_pl3_gen5,
+ sizeof(pp_pl3_load_save_pl3_gen5),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize
},
{
@@ -125,7 +234,7 @@ static struct pp_module pp_modules_gen5[] = {
NULL,
},
- pp_nv12_avs_initialize,
+ pp_nv12_avs_initialize_nlas,
},
{
@@ -139,26 +248,101 @@ static struct pp_module pp_modules_gen5[] = {
pp_nv12_dndi_initialize,
},
+
+ {
+ {
+ "NV12 DN module",
+ PP_NV12_DN,
+ pp_nv12_dn_gen5,
+ sizeof(pp_nv12_dn_gen5),
+ NULL,
+ },
+
+ pp_nv12_dn_initialize,
+ },
+
+ {
+ {
+ "NV12_PA module",
+ PP_NV12_LOAD_SAVE_PA,
+ pp_nv12_load_save_pa_gen5,
+ sizeof(pp_nv12_load_save_pa_gen5),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+ {
+ {
+ "PL3_PA module",
+ PP_PL3_LOAD_SAVE_PA,
+ pp_pl3_load_save_pa_gen5,
+ sizeof(pp_pl3_load_save_pa_gen5),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+ {
+ {
+ "PA_NV12 module",
+ PP_PA_LOAD_SAVE_NV12,
+ pp_pa_load_save_nv12_gen5,
+ sizeof(pp_pa_load_save_nv12_gen5),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+
};
static const uint32_t pp_null_gen6[][4] = {
-#include "shaders/post_processing/null.g6b"
+#include "shaders/post_processing/gen5_6/null.g6b"
};
-static const uint32_t pp_nv12_load_save_gen6[][4] = {
-#include "shaders/post_processing/nv12_load_save_nv12.g6b"
+static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
+};
+
+static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
+};
+
+static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
+};
+
+static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
};
static const uint32_t pp_nv12_scaling_gen6[][4] = {
-#include "shaders/post_processing/nv12_scaling_nv12.g6b"
+#include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
};
static const uint32_t pp_nv12_avs_gen6[][4] = {
-#include "shaders/post_processing/nv12_avs_nv12.g6b"
+#include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
};
static const uint32_t pp_nv12_dndi_gen6[][4] = {
-#include "shaders/post_processing/nv12_dndi_nv12.g6b"
+#include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
+};
+
+static const uint32_t pp_nv12_dn_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
+};
+
+static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
+};
+
+static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
+};
+
+static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
+#include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
};
static struct pp_module pp_modules_gen6[] = {
@@ -176,14 +360,50 @@ static struct pp_module pp_modules_gen6[] = {
{
{
- "NV12 Load & Save module",
- PP_NV12_LOAD_SAVE,
- pp_nv12_load_save_gen6,
- sizeof(pp_nv12_load_save_gen6),
+ "NV12_NV12",
+ PP_NV12_LOAD_SAVE_N12,
+ pp_nv12_load_save_nv12_gen6,
+ sizeof(pp_nv12_load_save_nv12_gen6),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+
+ {
+ {
+ "NV12_PL3",
+ PP_NV12_LOAD_SAVE_PL3,
+ pp_nv12_load_save_pl3_gen6,
+ sizeof(pp_nv12_load_save_pl3_gen6),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+
+ {
+ {
+ "PL3_NV12",
+ PP_PL3_LOAD_SAVE_N12,
+ pp_pl3_load_save_nv12_gen6,
+ sizeof(pp_pl3_load_save_nv12_gen6),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+
+ {
+ {
+ "PL3_PL3",
+ PP_PL3_LOAD_SAVE_N12,
+ pp_pl3_load_save_pl3_gen6,
+ sizeof(pp_pl3_load_save_pl3_gen6),
NULL,
},
- pp_nv12_load_save_initialize,
+ pp_plx_load_save_plx_initialize,
},
{
@@ -195,7 +415,7 @@ static struct pp_module pp_modules_gen6[] = {
NULL,
},
- pp_nv12_scaling_initialize,
+ gen6_nv12_scaling_initialize,
},
{
@@ -207,7 +427,7 @@ static struct pp_module pp_modules_gen6[] = {
NULL,
},
- pp_nv12_avs_initialize,
+ pp_nv12_avs_initialize_nlas,
},
{
@@ -221,185 +441,641 @@ static struct pp_module pp_modules_gen6[] = {
pp_nv12_dndi_initialize,
},
-};
-#define pp_static_parameter pp_context->pp_static_parameter
-#define pp_inline_parameter pp_context->pp_inline_parameter
+ {
+ {
+ "NV12 DN module",
+ PP_NV12_DN,
+ pp_nv12_dn_gen6,
+ sizeof(pp_nv12_dn_gen6),
+ NULL,
+ },
-static void
-pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
-{
- switch (tiling) {
- case I915_TILING_NONE:
- ss->ss3.tiled_surface = 0;
- ss->ss3.tile_walk = 0;
- break;
- case I915_TILING_X:
- ss->ss3.tiled_surface = 1;
- ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
- break;
- case I915_TILING_Y:
- ss->ss3.tiled_surface = 1;
- ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
- break;
- }
-}
+ pp_nv12_dn_initialize,
+ },
+ {
+ {
+ "NV12_PA module",
+ PP_NV12_LOAD_SAVE_PA,
+ pp_nv12_load_save_pa_gen6,
+ sizeof(pp_nv12_load_save_pa_gen6),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+ {
+ {
+ "PL3_PA module",
+ PP_PL3_LOAD_SAVE_PA,
+ pp_pl3_load_save_pa_gen6,
+ sizeof(pp_pl3_load_save_pa_gen6),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
+ {
+ {
+ "PA_NV12 module",
+ PP_PA_LOAD_SAVE_NV12,
+ pp_pa_load_save_nv12_gen6,
+ sizeof(pp_pa_load_save_nv12_gen6),
+ NULL,
+ },
+
+ pp_plx_load_save_plx_initialize,
+ },
-static void
-pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
-{
- switch (tiling) {
- case I915_TILING_NONE:
- ss->ss2.tiled_surface = 0;
- ss->ss2.tile_walk = 0;
- break;
- case I915_TILING_X:
- ss->ss2.tiled_surface = 1;
- ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
- break;
- case I915_TILING_Y:
- ss->ss2.tiled_surface = 1;
- ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
- break;
- }
-}
+};
-static void
-ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
-{
+static const uint32_t pp_null_gen7[][4] = {
+};
-}
+static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
+};
-static void
-ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
-{
- struct i965_interface_descriptor *desc;
- dri_bo *bo;
- int pp_index = pp_context->current_pp;
+static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
+};
- bo = pp_context->idrt.bo;
- dri_bo_map(bo, 1);
- assert(bo->virtual);
- desc = bo->virtual;
- memset(desc, 0, sizeof(*desc));
- desc->desc0.grf_reg_blocks = 10;
- desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
- desc->desc1.const_urb_entry_read_offset = 0;
- desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
- desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
- desc->desc2.sampler_count = 0;
- desc->desc3.binding_table_entry_count = 0;
- desc->desc3.binding_table_pointer =
- pp_context->binding_table.bo->offset >> 5; /*reloc */
+static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
+};
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- desc->desc0.grf_reg_blocks,
- offsetof(struct i965_interface_descriptor, desc0),
- pp_context->pp_modules[pp_index].kernel.bo);
+static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
+};
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- desc->desc2.sampler_count << 2,
- offsetof(struct i965_interface_descriptor, desc2),
- pp_context->sampler_state_table.bo);
+static const uint32_t pp_nv12_scaling_gen7[][4] = {
+#include "shaders/post_processing/gen7/avs.g7b"
+};
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- desc->desc3.binding_table_entry_count,
- offsetof(struct i965_interface_descriptor, desc3),
- pp_context->binding_table.bo);
+static const uint32_t pp_nv12_avs_gen7[][4] = {
+#include "shaders/post_processing/gen7/avs.g7b"
+};
- dri_bo_unmap(bo);
- pp_context->idrt.num_interface_descriptors++;
-}
+static const uint32_t pp_nv12_dndi_gen7[][4] = {
+#include "shaders/post_processing/gen7/dndi.g7b"
+};
-static void
-ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
-{
- unsigned int *binding_table;
- dri_bo *bo = pp_context->binding_table.bo;
- int i;
+static const uint32_t pp_nv12_dn_gen7[][4] = {
+#include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
+};
- dri_bo_map(bo, 1);
- assert(bo->virtual);
- binding_table = bo->virtual;
- memset(binding_table, 0, bo->size);
-
- for (i = 0; i < MAX_PP_SURFACES; i++) {
- if (pp_context->surfaces[i].ss_bo) {
- assert(pp_context->surfaces[i].s_bo);
-
- binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- 0,
- i * sizeof(*binding_table),
- pp_context->surfaces[i].ss_bo);
- }
-
- }
+static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pa.g7b"
+};
- dri_bo_unmap(bo);
-}
+static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pa.g7b"
+};
-static void
-ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
-{
- struct i965_vfe_state *vfe_state;
- dri_bo *bo;
+static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
+#include "shaders/post_processing/gen7/pa_to_pl2.g7b"
+};
- bo = pp_context->vfe_state.bo;
- dri_bo_map(bo, 1);
- assert(bo->virtual);
- vfe_state = bo->virtual;
- memset(vfe_state, 0, sizeof(*vfe_state));
- vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
- vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
- vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
- vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
- vfe_state->vfe1.children_present = 0;
- vfe_state->vfe2.interface_descriptor_base =
- pp_context->idrt.bo->offset >> 4; /* reloc */
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- 0,
- offsetof(struct i965_vfe_state, vfe2),
- pp_context->idrt.bo);
- dri_bo_unmap(bo);
-}
+static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
+static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
+static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
+
+static struct pp_module pp_modules_gen7[] = {
+ {
+ {
+ "NULL module (for testing)",
+ PP_NULL,
+ pp_null_gen7,
+ sizeof(pp_null_gen7),
+ NULL,
+ },
-static void
-ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
-{
- unsigned char *constant_buffer;
+ pp_null_initialize,
+ },
- assert(sizeof(pp_static_parameter) == 128);
- dri_bo_map(pp_context->curbe.bo, 1);
+ {
+ {
+ "NV12_NV12",
+ PP_NV12_LOAD_SAVE_N12,
+ pp_nv12_load_save_nv12_gen7,
+ sizeof(pp_nv12_load_save_nv12_gen7),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12_PL3",
+ PP_NV12_LOAD_SAVE_PL3,
+ pp_nv12_load_save_pl3_gen7,
+ sizeof(pp_nv12_load_save_pl3_gen7),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PL3_NV12",
+ PP_PL3_LOAD_SAVE_N12,
+ pp_pl3_load_save_nv12_gen7,
+ sizeof(pp_pl3_load_save_nv12_gen7),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PL3_PL3",
+ PP_PL3_LOAD_SAVE_N12,
+ pp_pl3_load_save_pl3_gen7,
+ sizeof(pp_pl3_load_save_pl3_gen7),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12 Scaling module",
+ PP_NV12_SCALING,
+ pp_nv12_scaling_gen7,
+ sizeof(pp_nv12_scaling_gen7),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12 AVS module",
+ PP_NV12_AVS,
+ pp_nv12_avs_gen7,
+ sizeof(pp_nv12_avs_gen7),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12 DNDI module",
+ PP_NV12_DNDI,
+ pp_nv12_dndi_gen7,
+ sizeof(pp_nv12_dndi_gen7),
+ NULL,
+ },
+
+ gen7_pp_nv12_dndi_initialize,
+ },
+
+ {
+ {
+ "NV12 DN module",
+ PP_NV12_DN,
+ pp_nv12_dn_gen7,
+ sizeof(pp_nv12_dn_gen7),
+ NULL,
+ },
+
+ gen7_pp_nv12_dn_initialize,
+ },
+ {
+ {
+ "NV12_PA module",
+ PP_NV12_LOAD_SAVE_PA,
+ pp_nv12_load_save_pa_gen7,
+ sizeof(pp_nv12_load_save_pa_gen7),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+ {
+ {
+ "PL3_PA module",
+ PP_PL3_LOAD_SAVE_PA,
+ pp_pl3_load_save_pa_gen7,
+ sizeof(pp_pl3_load_save_pa_gen7),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PA_NV12 module",
+ PP_PA_LOAD_SAVE_NV12,
+ pp_pa_load_save_nv12_gen7,
+ sizeof(pp_pa_load_save_nv12_gen7),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+};
+
+static const uint32_t pp_null_gen75[][4] = {
+};
+
+static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
+};
+
+static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
+};
+
+static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
+};
+
+static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
+};
+
+static const uint32_t pp_nv12_scaling_gen75[][4] = {
+#include "shaders/post_processing/gen7/avs.g75b"
+};
+
+static const uint32_t pp_nv12_avs_gen75[][4] = {
+#include "shaders/post_processing/gen7/avs.g75b"
+};
+
+static const uint32_t pp_nv12_dndi_gen75[][4] = {
+// #include "shaders/post_processing/gen7/dndi.g75b"
+};
+
+static const uint32_t pp_nv12_dn_gen75[][4] = {
+// #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
+};
+static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl2_to_pa.g75b"
+};
+
+static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
+#include "shaders/post_processing/gen7/pl3_to_pa.g75b"
+};
+
+static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
+#include "shaders/post_processing/gen7/pa_to_pl2.g75b"
+};
+
+static struct pp_module pp_modules_gen75[] = {
+ {
+ {
+ "NULL module (for testing)",
+ PP_NULL,
+ pp_null_gen75,
+ sizeof(pp_null_gen75),
+ NULL,
+ },
+
+ pp_null_initialize,
+ },
+
+ {
+ {
+ "NV12_NV12",
+ PP_NV12_LOAD_SAVE_N12,
+ pp_nv12_load_save_nv12_gen75,
+ sizeof(pp_nv12_load_save_nv12_gen75),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12_PL3",
+ PP_NV12_LOAD_SAVE_PL3,
+ pp_nv12_load_save_pl3_gen75,
+ sizeof(pp_nv12_load_save_pl3_gen75),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PL3_NV12",
+ PP_PL3_LOAD_SAVE_N12,
+ pp_pl3_load_save_nv12_gen75,
+ sizeof(pp_pl3_load_save_nv12_gen75),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PL3_PL3",
+ PP_PL3_LOAD_SAVE_N12,
+ pp_pl3_load_save_pl3_gen75,
+ sizeof(pp_pl3_load_save_pl3_gen75),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12 Scaling module",
+ PP_NV12_SCALING,
+ pp_nv12_scaling_gen75,
+ sizeof(pp_nv12_scaling_gen75),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12 AVS module",
+ PP_NV12_AVS,
+ pp_nv12_avs_gen75,
+ sizeof(pp_nv12_avs_gen75),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "NV12 DNDI module",
+ PP_NV12_DNDI,
+ pp_nv12_dndi_gen75,
+ sizeof(pp_nv12_dndi_gen75),
+ NULL,
+ },
+
+ gen7_pp_nv12_dndi_initialize,
+ },
+
+ {
+ {
+ "NV12 DN module",
+ PP_NV12_DN,
+ pp_nv12_dn_gen75,
+ sizeof(pp_nv12_dn_gen75),
+ NULL,
+ },
+
+ gen7_pp_nv12_dn_initialize,
+ },
+ {
+ {
+ "NV12_PA module",
+ PP_NV12_LOAD_SAVE_PA,
+ pp_nv12_load_save_pa_gen75,
+ sizeof(pp_nv12_load_save_pa_gen75),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PL3_PA module",
+ PP_PL3_LOAD_SAVE_PA,
+ pp_pl3_load_save_pa_gen75,
+ sizeof(pp_pl3_load_save_pa_gen75),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+ {
+ {
+ "PA_NV12 module",
+ PP_PA_LOAD_SAVE_NV12,
+ pp_pa_load_save_nv12_gen75,
+ sizeof(pp_pa_load_save_nv12_gen75),
+ NULL,
+ },
+
+ gen7_pp_plx_avs_initialize,
+ },
+
+};
+
+static int
+pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ int fourcc;
+
+ if (surface->type == I965_SURFACE_TYPE_IMAGE) {
+ struct object_image *obj_image = IMAGE(surface->id);
+ fourcc = obj_image->image.format.fourcc;
+ } else {
+ struct object_surface *obj_surface = SURFACE(surface->id);
+ fourcc = obj_surface->fourcc;
+ }
+
+ return fourcc;
+}
+
+static void
+pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss3.tiled_surface = 0;
+ ss->ss3.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss3.tiled_surface = 1;
+ ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss3.tiled_surface = 1;
+ ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+static void
+pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss2.tiled_surface = 0;
+ ss->ss2.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+static void
+gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss0.tiled_surface = 0;
+ ss->ss0.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss0.tiled_surface = 1;
+ ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+static void
+gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
+{
+ switch (tiling) {
+ case I915_TILING_NONE:
+ ss->ss2.tiled_surface = 0;
+ ss->ss2.tile_walk = 0;
+ break;
+ case I915_TILING_X:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
+ break;
+ case I915_TILING_Y:
+ ss->ss2.tiled_surface = 1;
+ ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
+ break;
+ }
+}
+
+
+static void
+ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
+{
+ struct i965_interface_descriptor *desc;
+ dri_bo *bo;
+ int pp_index = pp_context->current_pp;
+
+ bo = pp_context->idrt.bo;
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ desc = bo->virtual;
+ memset(desc, 0, sizeof(*desc));
+ desc->desc0.grf_reg_blocks = 10;
+ desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
+ desc->desc1.const_urb_entry_read_offset = 0;
+ desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
+ desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
+ desc->desc2.sampler_count = 0;
+ desc->desc3.binding_table_entry_count = 0;
+ desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
+
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ desc->desc0.grf_reg_blocks,
+ offsetof(struct i965_interface_descriptor, desc0),
+ pp_context->pp_modules[pp_index].kernel.bo);
+
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ desc->desc2.sampler_count << 2,
+ offsetof(struct i965_interface_descriptor, desc2),
+ pp_context->sampler_state_table.bo);
+
+ dri_bo_unmap(bo);
+ pp_context->idrt.num_interface_descriptors++;
+}
+
+static void
+ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
+{
+ struct i965_vfe_state *vfe_state;
+ dri_bo *bo;
+
+ bo = pp_context->vfe_state.bo;
+ dri_bo_map(bo, 1);
+ assert(bo->virtual);
+ vfe_state = bo->virtual;
+ memset(vfe_state, 0, sizeof(*vfe_state));
+ vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
+ vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
+ vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
+ vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
+ vfe_state->vfe1.children_present = 0;
+ vfe_state->vfe2.interface_descriptor_base =
+ pp_context->idrt.bo->offset >> 4; /* reloc */
+ dri_bo_emit_reloc(bo,
+ I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 0,
+ offsetof(struct i965_vfe_state, vfe2),
+ pp_context->idrt.bo);
+ dri_bo_unmap(bo);
+}
+
+static void
+ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
+{
+ unsigned char *constant_buffer;
+ struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+
+ assert(sizeof(*pp_static_parameter) == 128);
+ dri_bo_map(pp_context->curbe.bo, 1);
assert(pp_context->curbe.bo->virtual);
constant_buffer = pp_context->curbe.bo->virtual;
- memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
+ memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
dri_bo_unmap(pp_context->curbe.bo);
}
static void
-ironlake_pp_states_setup(VADriverContextP ctx)
+ironlake_pp_states_setup(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_post_processing_context *pp_context = i965->pp_context;
-
- ironlake_pp_surface_state(pp_context);
- ironlake_pp_binding_table(pp_context);
ironlake_pp_interface_descriptor_table(pp_context);
ironlake_pp_vfe_state(pp_context);
ironlake_pp_upload_constants(pp_context);
}
static void
-ironlake_pp_pipeline_select(VADriverContextP ctx)
+ironlake_pp_pipeline_select(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
BEGIN_BATCH(batch, 1);
OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
@@ -407,10 +1083,10 @@ ironlake_pp_pipeline_select(VADriverContextP ctx)
}
static void
-ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+ironlake_pp_urb_layout(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
unsigned int vfe_fence, cs_fence;
vfe_fence = pp_context->urb.cs_start;
@@ -426,15 +1102,15 @@ ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context
}
static void
-ironlake_pp_state_base_address(VADriverContextP ctx)
+ironlake_pp_state_base_address(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
BEGIN_BATCH(batch, 8);
OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
- OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+ OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
@@ -444,10 +1120,10 @@ ironlake_pp_state_base_address(VADriverContextP ctx)
}
static void
-ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+ironlake_pp_state_pointers(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
BEGIN_BATCH(batch, 3);
OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
@@ -457,10 +1133,10 @@ ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_con
}
static void
-ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+ironlake_pp_cs_urb_layout(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
@@ -471,10 +1147,10 @@ ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_cont
}
static void
-ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+ironlake_pp_constant_buffer(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
@@ -485,53 +1161,512 @@ ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_co
}
static void
-ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+ironlake_pp_object_walker(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
int x, x_steps, y, y_steps;
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
x_steps = pp_context->pp_x_steps(&pp_context->private_context);
y_steps = pp_context->pp_y_steps(&pp_context->private_context);
- for (y = 0; y < y_steps; y++) {
- for (x = 0; x < x_steps; x++) {
- if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
- BEGIN_BATCH(batch, 20);
- OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
- OUT_BATCH(batch, 0);
- OUT_BATCH(batch, 0); /* no indirect data */
- OUT_BATCH(batch, 0);
+ for (y = 0; y < y_steps; y++) {
+ for (x = 0; x < x_steps; x++) {
+ if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
+ BEGIN_BATCH(batch, 20);
+ OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
+ OUT_BATCH(batch, 0);
+ OUT_BATCH(batch, 0); /* no indirect data */
+ OUT_BATCH(batch, 0);
+
+ /* inline data grf 5-6 */
+ assert(sizeof(*pp_inline_parameter) == 64);
+ intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
+
+ ADVANCE_BATCH(batch);
+ }
+ }
+ }
+}
+
+static void
+ironlake_pp_pipeline_setup(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
+{
+ struct intel_batchbuffer *batch = pp_context->batch;
+
+ intel_batchbuffer_start_atomic(batch, 0x1000);
+ intel_batchbuffer_emit_mi_flush(batch);
+ ironlake_pp_pipeline_select(ctx, pp_context);
+ ironlake_pp_state_base_address(ctx, pp_context);
+ ironlake_pp_state_pointers(ctx, pp_context);
+ ironlake_pp_urb_layout(ctx, pp_context);
+ ironlake_pp_cs_urb_layout(ctx, pp_context);
+ ironlake_pp_constant_buffer(ctx, pp_context);
+ ironlake_pp_object_walker(ctx, pp_context);
+ intel_batchbuffer_end_atomic(batch);
+}
+
+// update u/v offset when the surface format are packed yuv
+static void i965_update_src_surface_uv_offset(
+ VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *surface)
+{
+ struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ int fourcc = pp_get_surface_fourcc(ctx, surface);
+
+ if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
+ pp_static_parameter->grf1.source_packed_u_offset = 1;
+ pp_static_parameter->grf1.source_packed_v_offset = 3;
+ }
+ else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+ pp_static_parameter->grf1.source_packed_y_offset = 1;
+ pp_static_parameter->grf1.source_packed_v_offset = 2;
+ }
+
+}
+
+static void i965_update_dst_surface_uv_offset(
+ VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *surface)
+{
+ struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ int fourcc = pp_get_surface_fourcc(ctx, surface);
+
+ if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
+ pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
+ pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
+ }
+ else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+ pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
+ pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
+ }
+
+}
+
+static void
+i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ dri_bo *surf_bo, unsigned long surf_bo_offset,
+ int width, int height, int pitch, int format,
+ int index, int is_target)
+{
+ struct i965_surface_state *ss;
+ dri_bo *ss_bo;
+ unsigned int tiling;
+ unsigned int swizzle;
+
+ dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
+ ss_bo = pp_context->surface_state_binding_table.bo;
+ assert(ss_bo);
+
+ dri_bo_map(ss_bo, True);
+ assert(ss_bo->virtual);
+ ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
+ memset(ss, 0, sizeof(*ss));
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ ss->ss0.surface_format = format;
+ ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
+ ss->ss2.width = width - 1;
+ ss->ss2.height = height - 1;
+ ss->ss3.pitch = pitch - 1;
+ pp_set_surface_tiling(ss, tiling);
+ dri_bo_emit_reloc(ss_bo,
+ I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
+ surf_bo_offset,
+ SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
+ surf_bo);
+ ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+ dri_bo_unmap(ss_bo);
+}
+
+static void
+i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ dri_bo *surf_bo, unsigned long surf_bo_offset,
+ int width, int height, int wpitch,
+ int xoffset, int yoffset,
+ int format, int interleave_chroma,
+ int index)
+{
+ struct i965_surface_state2 *ss2;
+ dri_bo *ss2_bo;
+ unsigned int tiling;
+ unsigned int swizzle;
+
+ dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
+ ss2_bo = pp_context->surface_state_binding_table.bo;
+ assert(ss2_bo);
+
+ dri_bo_map(ss2_bo, True);
+ assert(ss2_bo->virtual);
+ ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
+ memset(ss2, 0, sizeof(*ss2));
+ ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
+ ss2->ss1.cbcr_pixel_offset_v_direction = 0;
+ ss2->ss1.width = width - 1;
+ ss2->ss1.height = height - 1;
+ ss2->ss2.pitch = wpitch - 1;
+ ss2->ss2.interleave_chroma = interleave_chroma;
+ ss2->ss2.surface_format = format;
+ ss2->ss3.x_offset_for_cb = xoffset;
+ ss2->ss3.y_offset_for_cb = yoffset;
+ pp_set_surface2_tiling(ss2, tiling);
+ dri_bo_emit_reloc(ss2_bo,
+ I915_GEM_DOMAIN_RENDER, 0,
+ surf_bo_offset,
+ SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
+ surf_bo);
+ ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+ dri_bo_unmap(ss2_bo);
+}
+
+static void
+gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ dri_bo *surf_bo, unsigned long surf_bo_offset,
+ int width, int height, int pitch, int format,
+ int index, int is_target)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ struct gen7_surface_state *ss;
+ dri_bo *ss_bo;
+ unsigned int tiling;
+ unsigned int swizzle;
+
+ dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
+ ss_bo = pp_context->surface_state_binding_table.bo;
+ assert(ss_bo);
+
+ dri_bo_map(ss_bo, True);
+ assert(ss_bo->virtual);
+ ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
+ memset(ss, 0, sizeof(*ss));
+ ss->ss0.surface_type = I965_SURFACE_2D;
+ ss->ss0.surface_format = format;
+ ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
+ ss->ss2.width = width - 1;
+ ss->ss2.height = height - 1;
+ ss->ss3.pitch = pitch - 1;
+ gen7_pp_set_surface_tiling(ss, tiling);
+ if (IS_HASWELL(i965->intel.device_id))
+ gen7_render_set_surface_scs(ss);
+ dri_bo_emit_reloc(ss_bo,
+ I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
+ surf_bo_offset,
+ SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
+ surf_bo);
+ ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+ dri_bo_unmap(ss_bo);
+}
+
+static void
+gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ dri_bo *surf_bo, unsigned long surf_bo_offset,
+ int width, int height, int wpitch,
+ int xoffset, int yoffset,
+ int format, int interleave_chroma,
+ int index)
+{
+ struct gen7_surface_state2 *ss2;
+ dri_bo *ss2_bo;
+ unsigned int tiling;
+ unsigned int swizzle;
+
+ dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
+ ss2_bo = pp_context->surface_state_binding_table.bo;
+ assert(ss2_bo);
+
+ dri_bo_map(ss2_bo, True);
+ assert(ss2_bo->virtual);
+ ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
+ memset(ss2, 0, sizeof(*ss2));
+ ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
+ ss2->ss1.cbcr_pixel_offset_v_direction = 0;
+ ss2->ss1.width = width - 1;
+ ss2->ss1.height = height - 1;
+ ss2->ss2.pitch = wpitch - 1;
+ ss2->ss2.interleave_chroma = interleave_chroma;
+ ss2->ss2.surface_format = format;
+ ss2->ss3.x_offset_for_cb = xoffset;
+ ss2->ss3.y_offset_for_cb = yoffset;
+ gen7_pp_set_surface2_tiling(ss2, tiling);
+ dri_bo_emit_reloc(ss2_bo,
+ I915_GEM_DOMAIN_RENDER, 0,
+ surf_bo_offset,
+ SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
+ surf_bo);
+ ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
+ dri_bo_unmap(ss2_bo);
+}
+
+
+static void
+pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *surface,
+ int base_index, int is_target,
+ int *width, int *height, int *pitch, int *offset)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ struct object_image *obj_image;
+ dri_bo *bo;
+ int fourcc = pp_get_surface_fourcc(ctx, surface);
+ const int Y = 0;
+ const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
+ const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
+ const int UV = 1;
+ int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
+ int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
+
+ int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') ||
+ fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
+ fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
+ fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
+ int scale_factor_of_1st_plane_width_in_byte = 1;
+
+
+ if (surface->type == I965_SURFACE_TYPE_SURFACE) {
+ obj_surface = SURFACE(surface->id);
+ bo = obj_surface->bo;
+ width[0] = obj_surface->orig_width;
+ height[0] = obj_surface->orig_height;
+ pitch[0] = obj_surface->width;
+ offset[0] = 0;
+
+ if (full_packed_format) {
+ scale_factor_of_1st_plane_width_in_byte = 4;
+ pitch[0] = obj_surface->width * 4;
+ }
+ else if (packed_yuv ) {
+ scale_factor_of_1st_plane_width_in_byte = 2;
+ pitch[0] = obj_surface->width * 2;
+ }
+ else if (interleaved_uv) {
+ width[1] = obj_surface->orig_width;
+ height[1] = obj_surface->orig_height / 2;
+ pitch[1] = obj_surface->width;
+ offset[1] = offset[0] + obj_surface->width * obj_surface->height;
+ } else {
+ width[1] = obj_surface->orig_width / 2;
+ height[1] = obj_surface->orig_height / 2;
+ pitch[1] = obj_surface->width / 2;
+ offset[1] = offset[0] + obj_surface->width * obj_surface->height;
+ width[2] = obj_surface->orig_width / 2;
+ height[2] = obj_surface->orig_height / 2;
+ pitch[2] = obj_surface->width / 2;
+ offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
+ }
+ } else {
+ obj_image = IMAGE(surface->id);
+ bo = obj_image->bo;
+ width[0] = obj_image->image.width;
+ height[0] = obj_image->image.height;
+ pitch[0] = obj_image->image.pitches[0];
+ offset[0] = obj_image->image.offsets[0];
+
+ if (full_packed_format) {
+ scale_factor_of_1st_plane_width_in_byte = 4;
+ }
+ else if (packed_yuv ) {
+ scale_factor_of_1st_plane_width_in_byte = 2;
+ }
+ else if (interleaved_uv) {
+ width[1] = obj_image->image.width;
+ height[1] = obj_image->image.height / 2;
+ pitch[1] = obj_image->image.pitches[1];
+ offset[1] = obj_image->image.offsets[1];
+ } else {
+ width[1] = obj_image->image.width / 2;
+ height[1] = obj_image->image.height / 2;
+ pitch[1] = obj_image->image.pitches[1];
+ offset[1] = obj_image->image.offsets[1];
+ width[2] = obj_image->image.width / 2;
+ height[2] = obj_image->image.height / 2;
+ pitch[2] = obj_image->image.pitches[2];
+ offset[2] = obj_image->image.offsets[2];
+ }
+ }
+
+ /* Y surface */
+ i965_pp_set_surface_state(ctx, pp_context,
+ bo, offset[Y],
+ width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
+ base_index, is_target);
+
+ if (!packed_yuv && !full_packed_format) {
+ if (interleaved_uv) {
+ i965_pp_set_surface_state(ctx, pp_context,
+ bo, offset[UV],
+ width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
+ base_index + 1, is_target);
+ } else {
+ /* U surface */
+ i965_pp_set_surface_state(ctx, pp_context,
+ bo, offset[U],
+ width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
+ base_index + 1, is_target);
+
+ /* V surface */
+ i965_pp_set_surface_state(ctx, pp_context,
+ bo, offset[V],
+ width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
+ base_index + 2, is_target);
+ }
+ }
+
+}
+
+static void
+gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *surface,
+ int base_index, int is_target,
+ int *width, int *height, int *pitch, int *offset)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct object_surface *obj_surface;
+ struct object_image *obj_image;
+ dri_bo *bo;
+ int fourcc = pp_get_surface_fourcc(ctx, surface);
+ const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
+ fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
+ const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
+ fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
+ int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
+ int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
+
+ if (surface->type == I965_SURFACE_TYPE_SURFACE) {
+ obj_surface = SURFACE(surface->id);
+ bo = obj_surface->bo;
+ width[0] = obj_surface->orig_width;
+ height[0] = obj_surface->orig_height;
+ pitch[0] = obj_surface->width;
+ offset[0] = 0;
+
+ if (packed_yuv) {
+ if (is_target)
+ width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
+ else
+ width[0] = obj_surface->orig_width; /* surface foramt is YCBCR, width is specified in units of pixels */
+
+ pitch[0] = obj_surface->width * 2;
+ }
+
+ width[1] = obj_surface->cb_cr_width;
+ height[1] = obj_surface->cb_cr_height;
+ pitch[1] = obj_surface->cb_cr_pitch;
+ offset[1] = obj_surface->y_cb_offset * obj_surface->width;
+
+ width[2] = obj_surface->cb_cr_width;
+ height[2] = obj_surface->cb_cr_height;
+ pitch[2] = obj_surface->cb_cr_pitch;
+ offset[2] = obj_surface->y_cr_offset * obj_surface->width;
+ } else {
+ obj_image = IMAGE(surface->id);
+ bo = obj_image->bo;
+ width[0] = obj_image->image.width;
+ height[0] = obj_image->image.height;
+ pitch[0] = obj_image->image.pitches[0];
+ offset[0] = obj_image->image.offsets[0];
+
+ if (packed_yuv) {
+ if (is_target)
+ width[0] = obj_image->image.width * 2; /* surface format is R8, so double the width */
+ else
+ width[0] = obj_image->image.width; /* surface foramt is YCBCR, width is specified in units of pixels */
+ } else if (interleaved_uv) {
+ width[1] = obj_image->image.width / 2;
+ height[1] = obj_image->image.height / 2;
+ pitch[1] = obj_image->image.pitches[1];
+ offset[1] = obj_image->image.offsets[1];
+ } else {
+ width[1] = obj_image->image.width / 2;
+ height[1] = obj_image->image.height / 2;
+ pitch[1] = obj_image->image.pitches[U];
+ offset[1] = obj_image->image.offsets[U];
+ width[2] = obj_image->image.width / 2;
+ height[2] = obj_image->image.height / 2;
+ pitch[2] = obj_image->image.pitches[V];
+ offset[2] = obj_image->image.offsets[V];
+ }
+ }
+
+ if (is_target) {
+ gen7_pp_set_surface_state(ctx, pp_context,
+ bo, 0,
+ width[0] / 4, height[0], pitch[0],
+ I965_SURFACEFORMAT_R8_SINT,
+ base_index, 1);
+
+ if (!packed_yuv) {
+ if (interleaved_uv) {
+ gen7_pp_set_surface_state(ctx, pp_context,
+ bo, offset[1],
+ width[1] / 2, height[1], pitch[1],
+ I965_SURFACEFORMAT_R8G8_SINT,
+ base_index + 1, 1);
+ } else {
+ gen7_pp_set_surface_state(ctx, pp_context,
+ bo, offset[1],
+ width[1] / 4, height[1], pitch[1],
+ I965_SURFACEFORMAT_R8_SINT,
+ base_index + 1, 1);
+ gen7_pp_set_surface_state(ctx, pp_context,
+ bo, offset[2],
+ width[2] / 4, height[2], pitch[2],
+ I965_SURFACEFORMAT_R8_SINT,
+ base_index + 2, 1);
+ }
+ }
+ } else {
+ int format0 = SURFACE_FORMAT_Y8_UNORM;
+
+ switch (fourcc) {
+ case VA_FOURCC('Y', 'U', 'Y', '2'):
+ format0 = SURFACE_FORMAT_YCRCB_NORMAL;
+ break;
- /* inline data grf 5-6 */
- assert(sizeof(pp_inline_parameter) == 64);
- intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
+ case VA_FOURCC('U', 'Y', 'V', 'Y'):
+ format0 = SURFACE_FORMAT_YCRCB_SWAPY;
+ break;
- ADVANCE_BATCH(batch);
+ default:
+ break;
+ }
+
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ bo, offset[0],
+ width[0], height[0], pitch[0],
+ 0, 0,
+ format0, 0,
+ base_index);
+
+ if (!packed_yuv) {
+ if (interleaved_uv) {
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ bo, offset[1],
+ width[1], height[1], pitch[1],
+ 0, 0,
+ SURFACE_FORMAT_R8B8_UNORM, 0,
+ base_index + 1);
+ } else {
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ bo, offset[1],
+ width[1], height[1], pitch[1],
+ 0, 0,
+ SURFACE_FORMAT_R8_UNORM, 0,
+ base_index + 1);
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ bo, offset[2],
+ width[2], height[2], pitch[2],
+ 0, 0,
+ SURFACE_FORMAT_R8_UNORM, 0,
+ base_index + 2);
}
}
}
}
-static void
-ironlake_pp_pipeline_setup(VADriverContextP ctx)
-{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
- struct i965_post_processing_context *pp_context = i965->pp_context;
-
- intel_batchbuffer_start_atomic(batch, 0x1000);
- intel_batchbuffer_emit_mi_flush(batch);
- ironlake_pp_pipeline_select(ctx);
- ironlake_pp_state_base_address(ctx);
- ironlake_pp_state_pointers(ctx, pp_context);
- ironlake_pp_urb_layout(ctx, pp_context);
- ironlake_pp_cs_urb_layout(ctx, pp_context);
- ironlake_pp_constant_buffer(ctx, pp_context);
- ironlake_pp_object_walker(ctx, pp_context);
- intel_batchbuffer_end_atomic(batch);
-}
-
static int
pp_null_x_steps(void *private_context)
{
@@ -550,18 +1685,22 @@ pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int
return 0;
}
-static void
-pp_null_initialize(VADriverContextP ctx,
- VASurfaceID in_surface_id, VASurfaceID out_surface_id,
- const VARectangle *src_rect, const VARectangle *dst_rect)
+static VAStatus
+pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_post_processing_context *pp_context = i965->pp_context;
-
/* private function & data */
pp_context->pp_x_steps = pp_null_x_steps;
pp_context->pp_y_steps = pp_null_y_steps;
pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
+
+ dst_surface->flags = src_surface->flags;
+
+ return VA_STATUS_SUCCESS;
}
static int
@@ -581,170 +1720,93 @@ pp_load_save_y_steps(void *private_context)
static int
pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
{
- pp_inline_parameter.grf5.block_vertical_mask = 0xff;
- pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
- pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
- pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+ struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
+
+ pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
+ pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
return 0;
}
-static void
-pp_nv12_load_save_initialize(VADriverContextP ctx,
- VASurfaceID in_surface_id, VASurfaceID out_surface_id,
- const VARectangle *src_rect, const VARectangle *dst_rect)
+static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
+{
+ int i;
+ /* x offset of dest surface must be dword aligned.
+ * so we have to extend dst surface on left edge, and mask out pixels not interested
+ */
+ if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
+ pp_context->block_horizontal_mask_left = 0;
+ for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
+ {
+ pp_context->block_horizontal_mask_left |= 1<<i;
+ }
+ }
+ else {
+ pp_context->block_horizontal_mask_left = 0xffff;
+ }
+
+ int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
+ if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
+ pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
+ }
+ else {
+ pp_context->block_horizontal_mask_right = 0xffff;
+ }
+
+ if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
+ pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
+ }
+ else {
+ pp_context->block_vertical_mask_bottom = 0xff;
+ }
+
+}
+static VAStatus
+pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_post_processing_context *pp_context = i965->pp_context;
struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
- struct object_surface *obj_surface;
- struct i965_surface_state *ss;
- dri_bo *bo;
- int index, w, h;
- int orig_w, orig_h;
- unsigned int tiling, swizzle;
+ int width[3], height[3], pitch[3], offset[3];
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+ struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
/* source surface */
- obj_surface = SURFACE(in_surface_id);
- orig_w = obj_surface->orig_width;
- orig_h = obj_surface->orig_height;
- w = obj_surface->width;
- h = obj_surface->height;
- dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
-
- /* source Y surface index 1 */
- index = 1;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
- ss->ss2.width = orig_w / 4 - 1;
- ss->ss2.height = orig_h - 1;
- ss->ss3.pitch = w - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- 0,
- 0,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
-
- /* source UV surface index 2 */
- index = 2;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
- ss->ss2.width = orig_w / 4 - 1;
- ss->ss2.height = orig_h / 2 - 1;
- ss->ss3.pitch = w - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- 0,
- w * h,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
+ width, height, pitch, offset);
/* destination surface */
- obj_surface = SURFACE(out_surface_id);
- orig_w = obj_surface->orig_width;
- orig_h = obj_surface->orig_height;
- w = obj_surface->width;
- h = obj_surface->height;
- dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
-
- /* destination Y surface index 7 */
- index = 7;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
- ss->ss2.width = orig_w / 4 - 1;
- ss->ss2.height = orig_h - 1;
- ss->ss3.pitch = w - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- 0,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
-
- /* destination UV surface index 8 */
- index = 8;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
- ss->ss2.width = orig_w / 4 - 1;
- ss->ss2.height = orig_h / 2 - 1;
- ss->ss3.pitch = w - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- w * h,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
+ width, height, pitch, offset);
/* private function & data */
pp_context->pp_x_steps = pp_load_save_x_steps;
pp_context->pp_y_steps = pp_load_save_y_steps;
pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
- pp_load_save_context->dest_h = h;
- pp_load_save_context->dest_w = w;
- pp_inline_parameter.grf5.block_count_x = w / 16; /* 1 x N */
- pp_inline_parameter.grf5.number_blocks = w / 16;
+ int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
+ pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
+ pp_load_save_context->dest_y = dst_rect->y;
+ pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
+ pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
+
+ pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16; /* 1 x N */
+ pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
+
+ pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
+ pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
+
+ // update u/v offset for packed yuv
+ i965_update_src_surface_uv_offset (ctx, pp_context, src_surface);
+ i965_update_dst_surface_uv_offset (ctx, pp_context, dst_surface);
+
+ dst_surface->flags = src_surface->flags;
+
+ return VA_STATUS_SUCCESS;
}
static int
@@ -765,165 +1827,73 @@ static int
pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
{
struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
- float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
- float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
-
- pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
- pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
- pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
- pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+ struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+ float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
+
+ pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
+ pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
+ pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
+ pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
return 0;
}
-static void
-pp_nv12_scaling_initialize(VADriverContextP ctx,
- VASurfaceID in_surface_id, VASurfaceID out_surface_id,
- const VARectangle *src_rect, const VARectangle *dst_rect)
+static VAStatus
+pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_post_processing_context *pp_context = i965->pp_context;
struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+ struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
struct object_surface *obj_surface;
struct i965_sampler_state *sampler_state;
- struct i965_surface_state *ss;
- dri_bo *bo;
- int index;
int in_w, in_h, in_wpitch, in_hpitch;
int out_w, out_h, out_wpitch, out_hpitch;
- unsigned int tiling, swizzle;
/* source surface */
- obj_surface = SURFACE(in_surface_id);
+ obj_surface = SURFACE(src_surface->id);
in_w = obj_surface->orig_width;
in_h = obj_surface->orig_height;
in_wpitch = obj_surface->width;
in_hpitch = obj_surface->height;
- dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
/* source Y surface index 1 */
- index = 1;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
- ss->ss2.width = in_w - 1;
- ss->ss2.height = in_h - 1;
- ss->ss3.pitch = in_wpitch - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- 0,
- 0,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
+ 1, 0);
/* source UV surface index 2 */
- index = 2;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + in_wpitch * in_hpitch;
- ss->ss2.width = in_w / 2 - 1;
- ss->ss2.height = in_h / 2 - 1;
- ss->ss3.pitch = in_wpitch - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- 0,
- in_wpitch * in_hpitch,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, in_wpitch * in_hpitch,
+ in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
+ 2, 0);
/* destination surface */
- obj_surface = SURFACE(out_surface_id);
+ obj_surface = SURFACE(dst_surface->id);
out_w = obj_surface->orig_width;
out_h = obj_surface->orig_height;
out_wpitch = obj_surface->width;
out_hpitch = obj_surface->height;
- dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
/* destination Y surface index 7 */
- index = 7;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
- ss->ss2.width = out_w / 4 - 1;
- ss->ss2.height = out_h - 1;
- ss->ss3.pitch = out_wpitch - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- 0,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
+ 7, 1);
/* destination UV surface index 8 */
- index = 8;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + out_wpitch * out_hpitch;
- ss->ss2.width = out_w / 4 - 1;
- ss->ss2.height = out_h / 2 - 1;
- ss->ss3.pitch = out_wpitch - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- out_wpitch * out_hpitch,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, out_wpitch * out_hpitch,
+ out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
+ 8, 1);
/* sampler state */
dri_bo_map(pp_context->sampler_state_table.bo, True);
@@ -951,20 +1921,24 @@ pp_nv12_scaling_initialize(VADriverContextP ctx,
pp_context->pp_y_steps = pp_scaling_y_steps;
pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
- pp_scaling_context->dest_x = dst_rect->x;
+ int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
+ float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
+ pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
pp_scaling_context->dest_y = dst_rect->y;
- pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
- pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
- pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
- pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
+ pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
+ pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
+ pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
+ pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
+
+ pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
+
+ pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
+ pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16; /* 1 x N */
+ pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
- pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
+ dst_surface->flags = src_surface->flags;
- pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
- pp_inline_parameter.grf5.block_count_x = pp_scaling_context->dest_w / 16; /* 1 x N */
- pp_inline_parameter.grf5.number_blocks = pp_scaling_context->dest_w / 16;
- pp_inline_parameter.grf5.block_vertical_mask = 0xff;
- pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
+ return VA_STATUS_SUCCESS;
}
static int
@@ -985,20 +1959,25 @@ static int
pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
{
struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+ struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
float src_x_steping, src_y_steping, video_step_delta;
int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
- if (tmp_w >= pp_avs_context->dest_w) {
- pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
- pp_inline_parameter.grf6.video_step_delta = 0;
+ if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
+ src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+ pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
+ } else if (tmp_w >= pp_avs_context->dest_w) {
+ pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
+ pp_inline_parameter->grf6.video_step_delta = 0;
if (x == 0) {
- pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
+ pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
pp_avs_context->src_normalized_x;
} else {
- src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
- video_step_delta = pp_inline_parameter.grf6.video_step_delta;
- pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
+ src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+ video_step_delta = pp_inline_parameter->grf6.video_step_delta;
+ pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
16 * 15 * video_step_delta / 2;
}
} else {
@@ -1014,15 +1993,15 @@ pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int
f = (float) n2 * 16 / tmp_w;
if (n0 < 5) {
- pp_inline_parameter.grf6.video_step_delta = 0.0;
+ pp_inline_parameter->grf6.video_step_delta = 0.0;
if (x == 0) {
- pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
- pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
+ pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
+ pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
} else {
- src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
- video_step_delta = pp_inline_parameter.grf6.video_step_delta;
- pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
+ src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+ video_step_delta = pp_inline_parameter->grf6.video_step_delta;
+ pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
16 * 15 * video_step_delta / 2;
}
} else {
@@ -1031,218 +2010,160 @@ pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int
float a = f / (nls_left * 16 * factor_b);
float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
- pp_inline_parameter.grf6.video_step_delta = b;
+ pp_inline_parameter->grf6.video_step_delta = b;
if (x == 0) {
- pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
- pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
+ pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
+ pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
} else {
- src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
- video_step_delta = pp_inline_parameter.grf6.video_step_delta;
- pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
+ src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+ video_step_delta = pp_inline_parameter->grf6.video_step_delta;
+ pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
16 * 15 * video_step_delta / 2;
- pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
+ pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
}
} else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
/* scale the center linearly */
- src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
- video_step_delta = pp_inline_parameter.grf6.video_step_delta;
- pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
+ src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+ video_step_delta = pp_inline_parameter->grf6.video_step_delta;
+ pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
16 * 15 * video_step_delta / 2;
- pp_inline_parameter.grf6.video_step_delta = 0.0;
- pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
+ pp_inline_parameter->grf6.video_step_delta = 0.0;
+ pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
} else {
float a = f / (nls_right * 16 * factor_b);
float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
- src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
- video_step_delta = pp_inline_parameter.grf6.video_step_delta;
- pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
+ src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
+ video_step_delta = pp_inline_parameter->grf6.video_step_delta;
+ pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
16 * 15 * video_step_delta / 2;
- pp_inline_parameter.grf6.video_step_delta = -b;
+ pp_inline_parameter->grf6.video_step_delta = -b;
if (x == (pp_avs_context->dest_w / 16 - nls_right))
- pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16 - 1) * b;
+ pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16 - 1) * b;
else
- pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
+ pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
}
}
}
- src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
- pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
- pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
- pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
+ src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
+ pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
+ pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
+ pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
return 0;
}
-static void
-pp_nv12_avs_initialize(VADriverContextP ctx,
- VASurfaceID in_surface_id, VASurfaceID out_surface_id,
- const VARectangle *src_rect, const VARectangle *dst_rect)
+static VAStatus
+pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param,
+ int nlas)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_post_processing_context *pp_context = i965->pp_context;
struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+ struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
struct object_surface *obj_surface;
- struct i965_surface_state *ss;
struct i965_sampler_8x8 *sampler_8x8;
struct i965_sampler_8x8_state *sampler_8x8_state;
- struct i965_surface_state2 *ss_8x8;
- dri_bo *bo;
int index;
int in_w, in_h, in_wpitch, in_hpitch;
int out_w, out_h, out_wpitch, out_hpitch;
- unsigned int tiling, swizzle;
+ int i;
/* surface */
- obj_surface = SURFACE(in_surface_id);
+ obj_surface = SURFACE(src_surface->id);
in_w = obj_surface->orig_width;
in_h = obj_surface->orig_height;
in_wpitch = obj_surface->width;
in_hpitch = obj_surface->height;
- dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
/* source Y surface index 1 */
- index = 1;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "Y surface state for sample_8x8",
- sizeof(struct i965_surface_state2),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss_8x8 = bo->virtual;
- memset(ss_8x8, 0, sizeof(*ss_8x8));
- ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
- ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
- ss_8x8->ss1.width = in_w - 1;
- ss_8x8->ss1.height = in_h - 1;
- ss_8x8->ss2.half_pitch_for_chroma = 0;
- ss_8x8->ss2.pitch = in_wpitch - 1;
- ss_8x8->ss2.interleave_chroma = 0;
- ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
- ss_8x8->ss3.x_offset_for_cb = 0;
- ss_8x8->ss3.y_offset_for_cb = 0;
- pp_set_surface2_tiling(ss_8x8, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- 0,
- 0,
- offsetof(struct i965_surface_state2, ss0),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ i965_pp_set_surface2_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ in_w, in_h, in_wpitch,
+ 0, 0,
+ SURFACE_FORMAT_Y8_UNORM, 0,
+ 1);
/* source UV surface index 2 */
- index = 2;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "UV surface state for sample_8x8",
- sizeof(struct i965_surface_state2),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss_8x8 = bo->virtual;
- memset(ss_8x8, 0, sizeof(*ss_8x8));
- ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + in_wpitch * in_hpitch;
- ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
- ss_8x8->ss1.width = in_w - 1;
- ss_8x8->ss1.height = in_h - 1;
- ss_8x8->ss2.half_pitch_for_chroma = 0;
- ss_8x8->ss2.pitch = in_wpitch - 1;
- ss_8x8->ss2.interleave_chroma = 1;
- ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
- ss_8x8->ss3.x_offset_for_cb = 0;
- ss_8x8->ss3.y_offset_for_cb = 0;
- pp_set_surface2_tiling(ss_8x8, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- 0,
- in_wpitch * in_hpitch,
- offsetof(struct i965_surface_state2, ss0),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ i965_pp_set_surface2_state(ctx, pp_context,
+ obj_surface->bo, in_wpitch * in_hpitch,
+ in_w / 2, in_h / 2, in_wpitch,
+ 0, 0,
+ SURFACE_FORMAT_R8B8_UNORM, 0,
+ 2);
/* destination surface */
- obj_surface = SURFACE(out_surface_id);
+ obj_surface = SURFACE(dst_surface->id);
out_w = obj_surface->orig_width;
out_h = obj_surface->orig_height;
out_wpitch = obj_surface->width;
out_hpitch = obj_surface->height;
assert(out_w <= out_wpitch && out_h <= out_hpitch);
- dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
/* destination Y surface index 7 */
- index = 7;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
- ss->ss2.width = out_w / 4 - 1;
- ss->ss2.height = out_h - 1;
- ss->ss3.pitch = out_wpitch - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- 0,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
+ 7, 1);
/* destination UV surface index 8 */
- index = 8;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + out_wpitch * out_hpitch;
- ss->ss2.width = out_w / 4 - 1;
- ss->ss2.height = out_h / 2 - 1;
- ss->ss3.pitch = out_wpitch - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- out_wpitch * out_hpitch,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
-
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, out_wpitch * out_hpitch,
+ out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
+ 8, 1);
+
/* sampler 8x8 state */
dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
assert(pp_context->sampler_state_table.bo_8x8->virtual);
assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
+
+ for (i = 0; i < 17; i++) {
+ /* for Y channel, currently ignore */
+ sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
+ sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
+ sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
+ sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
+ sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
+ sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
+ sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
+ sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
+ sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
+ sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
+ sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
+ sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
+ sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
+ sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
+ sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
+ sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
+ /* for U/V channel, 0.25 */
+ sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
+ sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
+ sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
+ sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
+ sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
+ sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
+ sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
+ sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
+ sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
+ sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
+ sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
+ sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
+ sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
+ sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
+ sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
+ sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
+ }
+
sampler_8x8_state->dw136.default_sharpness_level = 0;
sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
@@ -1259,7 +2180,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx,
index = 1;
memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
- sampler_8x8[index].dw0.ief_bypass = 0;
+ sampler_8x8[index].dw0.ief_bypass = 1;
sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
@@ -1317,27 +2238,16 @@ pp_nv12_avs_initialize(VADriverContextP ctx,
0,
0,
sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
- pp_context->sampler_state_table.bo_8x8);
-
- dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
- assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
- assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
- sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
- memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
- sampler_8x8_state->dw136.default_sharpness_level = 0;
- sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
- sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
- sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
- dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
+ pp_context->sampler_state_table.bo_8x8);
/* sample_8x8 UV index 2 */
index = 2;
memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
- sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
- sampler_8x8[index].dw0.ief_bypass = 0;
+ sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
+ sampler_8x8[index].dw0.ief_bypass = 1;
sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
- sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
+ sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
sampler_8x8[index].dw2.global_noise_estimation = 22;
sampler_8x8[index].dw2.strong_edge_threshold = 8;
sampler_8x8[index].dw2.weak_edge_threshold = 1;
@@ -1392,7 +2302,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx,
0,
0,
sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
- pp_context->sampler_state_table.bo_8x8_uv);
+ pp_context->sampler_state_table.bo_8x8);
dri_bo_unmap(pp_context->sampler_state_table.bo);
@@ -1401,74 +2311,564 @@ pp_nv12_avs_initialize(VADriverContextP ctx,
pp_context->pp_y_steps = pp_avs_y_steps;
pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
- pp_avs_context->dest_x = dst_rect->x;
- pp_avs_context->dest_y = dst_rect->y;
- pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
- pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
- pp_avs_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
- pp_avs_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
- pp_avs_context->src_w = src_rect->width;
- pp_avs_context->src_h = src_rect->height;
+ int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
+ float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
+ pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
+ pp_avs_context->dest_y = dst_rect->y;
+ pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
+ pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
+ pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
+ pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
+ pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
+ pp_avs_context->src_h = src_rect->height;
+
+ pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
+ pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
+
+ pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
+ pp_inline_parameter->grf5.block_count_x = 1; /* M x 1 */
+ pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
+ pp_inline_parameter->grf6.video_step_delta = 0.0;
+
+ dst_surface->flags = src_surface->flags;
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
+{
+ return pp_nv12_avs_initialize(ctx, pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ filter_param,
+ 1);
+}
+
+static VAStatus
+gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
+{
+ return pp_nv12_avs_initialize(ctx, pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ filter_param,
+ 0);
+}
+
+static int
+gen7_pp_avs_x_steps(void *private_context)
+{
+ struct pp_avs_context *pp_avs_context = private_context;
+
+ return pp_avs_context->dest_w / 16;
+}
+
+static int
+gen7_pp_avs_y_steps(void *private_context)
+{
+ struct pp_avs_context *pp_avs_context = private_context;
+
+ return pp_avs_context->dest_h / 16;
+}
+
+static int
+gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+{
+ struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+ struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+ pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
+ pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
+ pp_inline_parameter->grf7.constant_0 = 0xffffffff;
+ pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
+
+ return 0;
+}
+
+static void gen7_update_src_surface_uv_offset(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *surface)
+{
+ struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ int fourcc = pp_get_surface_fourcc(ctx, surface);
+
+ if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
+ pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
+ pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
+ pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
+ } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+ pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
+ pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
+ pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
+ }
+}
+
+static VAStatus
+gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
+{
+ struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
+ struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ struct gen7_sampler_8x8 *sampler_8x8;
+ struct i965_sampler_8x8_state *sampler_8x8_state;
+ int index, i;
+ int width[3], height[3], pitch[3], offset[3];
+ int src_height;
+
+ /* source surface */
+ gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
+ width, height, pitch, offset);
+ src_height = height[0];
+
+ /* destination surface */
+ gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
+ width, height, pitch, offset);
+
+ /* sampler 8x8 state */
+ dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
+ assert(pp_context->sampler_state_table.bo_8x8->virtual);
+ assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
+ sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
+ memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
+
+ for (i = 0; i < 17; i++) {
+ /* for Y channel, currently ignore */
+ sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
+ sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
+ sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
+ sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
+ sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
+ sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
+ sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
+ sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
+ sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
+ sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
+ sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
+ sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
+ sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
+ sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
+ sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
+ sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
+ /* for U/V channel, 0.25 */
+ sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
+ sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
+ sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
+ sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
+ sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
+ sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
+ sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
+ sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
+ sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
+ sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
+ sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
+ sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
+ sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
+ sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
+ sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
+ sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
+ }
+
+ sampler_8x8_state->dw136.default_sharpness_level = 0;
+ sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
+ sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
+ sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
+ dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
+
+ /* sampler 8x8 */
+ dri_bo_map(pp_context->sampler_state_table.bo, True);
+ assert(pp_context->sampler_state_table.bo->virtual);
+ assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
+ sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
+
+ /* sample_8x8 Y index 4 */
+ index = 4;
+ memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
+ sampler_8x8[index].dw0.global_noise_estimation = 255;
+ sampler_8x8[index].dw0.ief_bypass = 1;
+
+ sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
+
+ sampler_8x8[index].dw2.weak_edge_threshold = 1;
+ sampler_8x8[index].dw2.strong_edge_threshold = 8;
+ sampler_8x8[index].dw2.r5x_coefficient = 9;
+ sampler_8x8[index].dw2.r5cx_coefficient = 8;
+ sampler_8x8[index].dw2.r5c_coefficient = 3;
+
+ sampler_8x8[index].dw3.r3x_coefficient = 27;
+ sampler_8x8[index].dw3.r3c_coefficient = 5;
+ sampler_8x8[index].dw3.gain_factor = 40;
+ sampler_8x8[index].dw3.non_edge_weight = 1;
+ sampler_8x8[index].dw3.regular_weight = 2;
+ sampler_8x8[index].dw3.strong_edge_weight = 7;
+ sampler_8x8[index].dw3.ief4_smooth_enable = 0;
+
+ dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
+ I915_GEM_DOMAIN_RENDER,
+ 0,
+ 0,
+ sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
+ pp_context->sampler_state_table.bo_8x8);
+
+ /* sample_8x8 UV index 8 */
+ index = 8;
+ memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
+ sampler_8x8[index].dw0.disable_8x8_filter = 0;
+ sampler_8x8[index].dw0.global_noise_estimation = 255;
+ sampler_8x8[index].dw0.ief_bypass = 1;
+ sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
+ sampler_8x8[index].dw2.weak_edge_threshold = 1;
+ sampler_8x8[index].dw2.strong_edge_threshold = 8;
+ sampler_8x8[index].dw2.r5x_coefficient = 9;
+ sampler_8x8[index].dw2.r5cx_coefficient = 8;
+ sampler_8x8[index].dw2.r5c_coefficient = 3;
+ sampler_8x8[index].dw3.r3x_coefficient = 27;
+ sampler_8x8[index].dw3.r3c_coefficient = 5;
+ sampler_8x8[index].dw3.gain_factor = 40;
+ sampler_8x8[index].dw3.non_edge_weight = 1;
+ sampler_8x8[index].dw3.regular_weight = 2;
+ sampler_8x8[index].dw3.strong_edge_weight = 7;
+ sampler_8x8[index].dw3.ief4_smooth_enable = 0;
+
+ dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
+ I915_GEM_DOMAIN_RENDER,
+ 0,
+ 0,
+ sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
+ pp_context->sampler_state_table.bo_8x8);
+
+ /* sampler_8x8 V, index 12 */
+ index = 12;
+ memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
+ sampler_8x8[index].dw0.disable_8x8_filter = 0;
+ sampler_8x8[index].dw0.global_noise_estimation = 255;
+ sampler_8x8[index].dw0.ief_bypass = 1;
+ sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
+ sampler_8x8[index].dw2.weak_edge_threshold = 1;
+ sampler_8x8[index].dw2.strong_edge_threshold = 8;
+ sampler_8x8[index].dw2.r5x_coefficient = 9;
+ sampler_8x8[index].dw2.r5cx_coefficient = 8;
+ sampler_8x8[index].dw2.r5c_coefficient = 3;
+ sampler_8x8[index].dw3.r3x_coefficient = 27;
+ sampler_8x8[index].dw3.r3c_coefficient = 5;
+ sampler_8x8[index].dw3.gain_factor = 40;
+ sampler_8x8[index].dw3.non_edge_weight = 1;
+ sampler_8x8[index].dw3.regular_weight = 2;
+ sampler_8x8[index].dw3.strong_edge_weight = 7;
+ sampler_8x8[index].dw3.ief4_smooth_enable = 0;
+
+ dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
+ I915_GEM_DOMAIN_RENDER,
+ 0,
+ 0,
+ sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
+ pp_context->sampler_state_table.bo_8x8);
+
+ dri_bo_unmap(pp_context->sampler_state_table.bo);
+
+ /* private function & data */
+ pp_context->pp_x_steps = gen7_pp_avs_x_steps;
+ pp_context->pp_y_steps = gen7_pp_avs_y_steps;
+ pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
+
+ pp_avs_context->dest_x = dst_rect->x;
+ pp_avs_context->dest_y = dst_rect->y;
+ pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
+ pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
+ pp_avs_context->src_w = src_rect->width;
+ pp_avs_context->src_h = src_rect->height;
+
+ int dw = (pp_avs_context->src_w - 1) / 16 + 1;
+ dw = MAX(dw, pp_avs_context->dest_w);
+
+ pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
+ pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
+ pp_static_parameter->grf2.avs_wa_width = dw;
+ pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
+ pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
+
+ pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / pp_avs_context->dest_w;
+ pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
+ pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
+ pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / pp_avs_context->dest_w;
+
+ gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
+
+ dst_surface->flags = src_surface->flags;
+
+ return VA_STATUS_SUCCESS;
+}
+
+
+static int
+pp_dndi_x_steps(void *private_context)
+{
+ return 1;
+}
+
+static int
+pp_dndi_y_steps(void *private_context)
+{
+ struct pp_dndi_context *pp_dndi_context = private_context;
+
+ return pp_dndi_context->dest_h / 4;
+}
+
+static int
+pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+{
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+ pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
+ pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
+
+ return 0;
+}
+
+static
+VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+ struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ struct object_surface *obj_surface;
+ struct i965_sampler_dndi *sampler_dndi;
+ int index;
+ int w, h;
+ int orig_w, orig_h;
+ int dndi_top_first = 1;
+
+ if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
+ return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
+
+ if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
+ dndi_top_first = 1;
+ else
+ dndi_top_first = 0;
+
+ /* surface */
+ obj_surface = SURFACE(src_surface->id);
+ orig_w = obj_surface->orig_width;
+ orig_h = obj_surface->orig_height;
+ w = obj_surface->width;
+ h = obj_surface->height;
+
+ if (pp_context->stmm.bo == NULL) {
+ pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "STMM surface",
+ w * h,
+ 4096);
+ assert(pp_context->stmm.bo);
+ }
+
+ /* source UV surface index 2 */
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, w * h,
+ orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+ 2, 0);
+
+ /* source YUV surface index 4 */
+ i965_pp_set_surface2_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ orig_w, orig_h, w,
+ 0, h,
+ SURFACE_FORMAT_PLANAR_420_8, 1,
+ 4);
+
+ /* source STMM surface index 20 */
+ i965_pp_set_surface_state(ctx, pp_context,
+ pp_context->stmm.bo, 0,
+ orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 20, 1);
+
+ /* destination surface */
+ obj_surface = SURFACE(dst_surface->id);
+ orig_w = obj_surface->orig_width;
+ orig_h = obj_surface->orig_height;
+ w = obj_surface->width;
+ h = obj_surface->height;
+
+ /* destination Y surface index 7 */
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 7, 1);
+
+ /* destination UV surface index 8 */
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, w * h,
+ orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+ 8, 1);
+ /* sampler dndi */
+ dri_bo_map(pp_context->sampler_state_table.bo, True);
+ assert(pp_context->sampler_state_table.bo->virtual);
+ assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
+ sampler_dndi = pp_context->sampler_state_table.bo->virtual;
+
+ /* sample dndi index 1 */
+ index = 0;
+ sampler_dndi[index].dw0.denoise_asd_threshold = 0;
+ sampler_dndi[index].dw0.denoise_history_delta = 8; // 0-15, default is 8
+ sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240
+ sampler_dndi[index].dw0.denoise_stad_threshold = 0;
+
+ sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
+ sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
+ sampler_dndi[index].dw1.stmm_c2 = 1;
+ sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
+ sampler_dndi[index].dw1.temporal_difference_threshold = 16;
+
+ sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31
+ sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7; // 0-15
+ sampler_dndi[index].dw2.denoise_edge_threshold = 7; // 0-15
+ sampler_dndi[index].dw2.good_neighbor_threshold = 4; // 0-63
+
+ sampler_dndi[index].dw3.maximum_stmm = 128;
+ sampler_dndi[index].dw3.multipler_for_vecm = 2;
+ sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
+ sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
+ sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
+
+ sampler_dndi[index].dw4.sdi_delta = 8;
+ sampler_dndi[index].dw4.sdi_threshold = 128;
+ sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift
+ sampler_dndi[index].dw4.stmm_shift_up = 0;
+ sampler_dndi[index].dw4.stmm_shift_down = 0;
+ sampler_dndi[index].dw4.minimum_stmm = 0;
+
+ sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
+ sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
+ sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
+ sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
+
+ sampler_dndi[index].dw6.dn_enable = 1;
+ sampler_dndi[index].dw6.di_enable = 1;
+ sampler_dndi[index].dw6.di_partial = 0;
+ sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
+ sampler_dndi[index].dw6.dndi_stream_id = 0;
+ sampler_dndi[index].dw6.dndi_first_frame = 1;
+ sampler_dndi[index].dw6.progressive_dn = 0;
+ sampler_dndi[index].dw6.fmd_tear_threshold = 63;
+ sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
+ sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
+
+ sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
+ sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
+ sampler_dndi[index].dw7.vdi_walker_enable = 0;
+ sampler_dndi[index].dw7.column_width_minus1 = 0;
+
+ dri_bo_unmap(pp_context->sampler_state_table.bo);
+
+ /* private function & data */
+ pp_context->pp_x_steps = pp_dndi_x_steps;
+ pp_context->pp_y_steps = pp_dndi_y_steps;
+ pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
+
+ pp_static_parameter->grf1.statistics_surface_picth = w / 2;
+ pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
+ pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
+ pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
+
+ pp_inline_parameter->grf5.block_count_x = w / 16; /* 1 x N */
+ pp_inline_parameter->grf5.number_blocks = w / 16;
+ pp_inline_parameter->grf5.block_vertical_mask = 0xff;
+ pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
- pp_static_parameter.grf4.r4_2.avs.nlas = 1;
- pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
+ pp_dndi_context->dest_w = w;
+ pp_dndi_context->dest_h = h;
- pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
- pp_inline_parameter.grf5.block_count_x = 1; /* M x 1 */
- pp_inline_parameter.grf5.number_blocks = pp_avs_context->dest_h / 8;
- pp_inline_parameter.grf5.block_vertical_mask = 0xff;
- pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
- pp_inline_parameter.grf6.video_step_delta = 0.0;
+ dst_surface->flags = I965_SURFACE_FLAG_FRAME;
+
+ return VA_STATUS_SUCCESS;
}
static int
-pp_dndi_x_steps(void *private_context)
+pp_dn_x_steps(void *private_context)
{
return 1;
}
static int
-pp_dndi_y_steps(void *private_context)
+pp_dn_y_steps(void *private_context)
{
- struct pp_dndi_context *pp_dndi_context = private_context;
+ struct pp_dn_context *pp_dn_context = private_context;
- return pp_dndi_context->dest_h / 4;
+ return pp_dn_context->dest_h / 8;
}
static int
-pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
{
- pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
- pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+ pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
+ pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
return 0;
}
static
-void pp_nv12_dndi_initialize(VADriverContextP ctx,
- VASurfaceID in_surface_id, VASurfaceID out_surface_id,
- const VARectangle *src_rect, const VARectangle *dst_rect)
+VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_post_processing_context *pp_context = i965->pp_context;
- struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
+ struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
struct object_surface *obj_surface;
- struct i965_surface_state *ss;
- struct i965_surface_state2 *ss_dndi;
struct i965_sampler_dndi *sampler_dndi;
- dri_bo *bo;
+ struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
int index;
int w, h;
int orig_w, orig_h;
- unsigned int tiling, swizzle;
+ int dn_strength = 15;
+ int dndi_top_first = 1;
+ int dn_progressive = 0;
+
+ if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
+ dndi_top_first = 1;
+ dn_progressive = 1;
+ } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
+ dndi_top_first = 1;
+ dn_progressive = 0;
+ } else {
+ dndi_top_first = 0;
+ dn_progressive = 0;
+ }
/* surface */
- obj_surface = SURFACE(in_surface_id);
+ obj_surface = SURFACE(src_surface->id);
orig_w = obj_surface->orig_width;
orig_h = obj_surface->orig_height;
w = obj_surface->width;
h = obj_surface->height;
- dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
if (pp_context->stmm.bo == NULL) {
pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
@@ -1479,165 +2879,44 @@ void pp_nv12_dndi_initialize(VADriverContextP ctx,
}
/* source UV surface index 2 */
- index = 2;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
- ss->ss2.width = orig_w / 4 - 1;
- ss->ss2.height = orig_h / 2 - 1;
- ss->ss3.pitch = w - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- 0,
- w * h,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, w * h,
+ orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+ 2, 0);
/* source YUV surface index 4 */
- index = 4;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "YUV surface state for deinterlace ",
- sizeof(struct i965_surface_state2),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss_dndi = bo->virtual;
- memset(ss_dndi, 0, sizeof(*ss_dndi));
- ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
- ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
- ss_dndi->ss1.width = w - 1;
- ss_dndi->ss1.height = h - 1;
- ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
- ss_dndi->ss2.half_pitch_for_chroma = 0;
- ss_dndi->ss2.pitch = w - 1;
- ss_dndi->ss2.interleave_chroma = 1;
- ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
- ss_dndi->ss2.half_pitch_for_chroma = 0;
- ss_dndi->ss2.tiled_surface = 0;
- ss_dndi->ss3.x_offset_for_cb = 0;
- ss_dndi->ss3.y_offset_for_cb = h;
- pp_set_surface2_tiling(ss_dndi, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- 0,
- 0,
- offsetof(struct i965_surface_state2, ss0),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ i965_pp_set_surface2_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ orig_w, orig_h, w,
+ 0, h,
+ SURFACE_FORMAT_PLANAR_420_8, 1,
+ 4);
/* source STMM surface index 20 */
- index = 20;
- pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "STMM surface state for deinterlace ",
- sizeof(struct i965_surface_state2),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
- ss->ss2.width = w - 1;
- ss->ss2.height = h - 1;
- ss->ss3.pitch = w - 1;
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- 0,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ i965_pp_set_surface_state(ctx, pp_context,
+ pp_context->stmm.bo, 0,
+ orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 20, 1);
/* destination surface */
- obj_surface = SURFACE(out_surface_id);
+ obj_surface = SURFACE(dst_surface->id);
orig_w = obj_surface->orig_width;
orig_h = obj_surface->orig_height;
w = obj_surface->width;
h = obj_surface->height;
- dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
/* destination Y surface index 7 */
- index = 7;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
- ss->ss2.width = orig_w / 4 - 1;
- ss->ss2.height = orig_h - 1;
- ss->ss3.pitch = w - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- 0,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 7, 1);
/* destination UV surface index 8 */
- index = 8;
- pp_context->surfaces[index].s_bo = obj_surface->bo;
- dri_bo_reference(pp_context->surfaces[index].s_bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "surface state",
- sizeof(struct i965_surface_state),
- 4096);
- assert(bo);
- pp_context->surfaces[index].ss_bo = bo;
- dri_bo_map(bo, True);
- assert(bo->virtual);
- ss = bo->virtual;
- memset(ss, 0, sizeof(*ss));
- ss->ss0.surface_type = I965_SURFACE_2D;
- ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
- ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
- ss->ss2.width = orig_w / 4 - 1;
- ss->ss2.height = orig_h / 2 - 1;
- ss->ss3.pitch = w - 1;
- pp_set_surface_tiling(ss, tiling);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_RENDER,
- I915_GEM_DOMAIN_RENDER,
- w * h,
- offsetof(struct i965_surface_state, ss1),
- pp_context->surfaces[index].s_bo);
- dri_bo_unmap(bo);
-
- /* sampler dndi */
+ i965_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, w * h,
+ orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+ 8, 1);
+ /* sampler dn */
dri_bo_map(pp_context->sampler_state_table.bo, True);
assert(pp_context->sampler_state_table.bo->virtual);
assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
@@ -1656,7 +2935,7 @@ void pp_nv12_dndi_initialize(VADriverContextP ctx,
sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
sampler_dndi[index].dw1.temporal_difference_threshold = 16;
- sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31
+ sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength; // 0-31
sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7; // 0-15
sampler_dndi[index].dw2.denoise_edge_threshold = 7; // 0-15
sampler_dndi[index].dw2.good_neighbor_threshold = 7; // 0-63
@@ -1680,57 +2959,514 @@ void pp_nv12_dndi_initialize(VADriverContextP ctx,
sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
sampler_dndi[index].dw6.dn_enable = 1;
+ sampler_dndi[index].dw6.di_enable = 0;
+ sampler_dndi[index].dw6.di_partial = 0;
+ sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
+ sampler_dndi[index].dw6.dndi_stream_id = 1;
+ sampler_dndi[index].dw6.dndi_first_frame = 1;
+ sampler_dndi[index].dw6.progressive_dn = dn_progressive;
+ sampler_dndi[index].dw6.fmd_tear_threshold = 32;
+ sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
+ sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
+
+ sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
+ sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
+ sampler_dndi[index].dw7.vdi_walker_enable = 0;
+ sampler_dndi[index].dw7.column_width_minus1 = w / 16;
+
+ dri_bo_unmap(pp_context->sampler_state_table.bo);
+
+ /* private function & data */
+ pp_context->pp_x_steps = pp_dn_x_steps;
+ pp_context->pp_y_steps = pp_dn_y_steps;
+ pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
+
+ pp_static_parameter->grf1.statistics_surface_picth = w / 2;
+ pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
+ pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
+ pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
+
+ pp_inline_parameter->grf5.block_count_x = w / 16; /* 1 x N */
+ pp_inline_parameter->grf5.number_blocks = w / 16;
+ pp_inline_parameter->grf5.block_vertical_mask = 0xff;
+ pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
+
+ pp_dn_context->dest_w = w;
+ pp_dn_context->dest_h = h;
+
+ return VA_STATUS_SUCCESS;
+}
+
+static int
+gen7_pp_dndi_x_steps(void *private_context)
+{
+ struct pp_dndi_context *pp_dndi_context = private_context;
+
+ return pp_dndi_context->dest_w / 16;
+}
+
+static int
+gen7_pp_dndi_y_steps(void *private_context)
+{
+ struct pp_dndi_context *pp_dndi_context = private_context;
+
+ return pp_dndi_context->dest_h / 4;
+}
+
+static int
+gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+{
+ struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+ pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
+ pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
+
+ return 0;
+}
+
+static VAStatus
+gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
+ struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ struct object_surface *obj_surface;
+ struct gen7_sampler_dndi *sampler_dndi;
+ int index;
+ int w, h;
+ int orig_w, orig_h;
+ int dndi_top_first = 1;
+
+ if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
+ return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
+
+ if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
+ dndi_top_first = 1;
+ else
+ dndi_top_first = 0;
+
+ /* surface */
+ obj_surface = SURFACE(src_surface->id);
+ orig_w = obj_surface->orig_width;
+ orig_h = obj_surface->orig_height;
+ w = obj_surface->width;
+ h = obj_surface->height;
+
+ if (pp_context->stmm.bo == NULL) {
+ pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "STMM surface",
+ w * h,
+ 4096);
+ assert(pp_context->stmm.bo);
+ }
+
+ /* source UV surface index 1 */
+ gen7_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, w * h,
+ orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+ 1, 0);
+
+ /* source YUV surface index 3 */
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ orig_w, orig_h, w,
+ 0, h,
+ SURFACE_FORMAT_PLANAR_420_8, 1,
+ 3);
+
+ /* source (temporal reference) YUV surface index 4 */
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ orig_w, orig_h, w,
+ 0, h,
+ SURFACE_FORMAT_PLANAR_420_8, 1,
+ 4);
+
+ /* STMM / History Statistics input surface, index 5 */
+ gen7_pp_set_surface_state(ctx, pp_context,
+ pp_context->stmm.bo, 0,
+ orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 5, 1);
+
+ /* destination surface */
+ obj_surface = SURFACE(dst_surface->id);
+ orig_w = obj_surface->orig_width;
+ orig_h = obj_surface->orig_height;
+ w = obj_surface->width;
+ h = obj_surface->height;
+
+ /* destination(Previous frame) Y surface index 27 */
+ gen7_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 27, 1);
+
+ /* destination(Previous frame) UV surface index 28 */
+ gen7_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, w * h,
+ orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+ 28, 1);
+
+ /* destination(Current frame) Y surface index 30 */
+ gen7_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 30, 1);
+
+ /* destination(Current frame) UV surface index 31 */
+ gen7_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, w * h,
+ orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+ 31, 1);
+
+ /* STMM output surface, index 33 */
+ gen7_pp_set_surface_state(ctx, pp_context,
+ pp_context->stmm.bo, 0,
+ orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 33, 1);
+
+
+ /* sampler dndi */
+ dri_bo_map(pp_context->sampler_state_table.bo, True);
+ assert(pp_context->sampler_state_table.bo->virtual);
+ assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
+ sampler_dndi = pp_context->sampler_state_table.bo->virtual;
+
+ /* sample dndi index 0 */
+ index = 0;
+ sampler_dndi[index].dw0.denoise_asd_threshold = 0;
+ sampler_dndi[index].dw0.dnmh_delt = 8;
+ sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
+ sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
+ sampler_dndi[index].dw0.denoise_maximum_history = 128; // 128-240
+ sampler_dndi[index].dw0.denoise_stad_threshold = 0;
+
+ sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
+ sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
+ sampler_dndi[index].dw1.stmm_c2 = 0;
+ sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
+ sampler_dndi[index].dw1.temporal_difference_threshold = 16;
+
+ sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15; // 0-31
+ sampler_dndi[index].dw2.bne_edge_th = 1;
+ sampler_dndi[index].dw2.smooth_mv_th = 0;
+ sampler_dndi[index].dw2.sad_tight_th = 5;
+ sampler_dndi[index].dw2.cat_slope_minus1 = 9;
+ sampler_dndi[index].dw2.good_neighbor_th = 4;
+
+ sampler_dndi[index].dw3.maximum_stmm = 128;
+ sampler_dndi[index].dw3.multipler_for_vecm = 2;
+ sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
+ sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
+ sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
+
+ sampler_dndi[index].dw4.sdi_delta = 8;
+ sampler_dndi[index].dw4.sdi_threshold = 128;
+ sampler_dndi[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift
+ sampler_dndi[index].dw4.stmm_shift_up = 0;
+ sampler_dndi[index].dw4.stmm_shift_down = 0;
+ sampler_dndi[index].dw4.minimum_stmm = 0;
+
+ sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
+ sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
+ sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
+ sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
+
+ sampler_dndi[index].dw6.dn_enable = 0;
sampler_dndi[index].dw6.di_enable = 1;
sampler_dndi[index].dw6.di_partial = 0;
- sampler_dndi[index].dw6.dndi_top_first = 1;
+ sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
sampler_dndi[index].dw6.dndi_stream_id = 1;
sampler_dndi[index].dw6.dndi_first_frame = 1;
sampler_dndi[index].dw6.progressive_dn = 0;
+ sampler_dndi[index].dw6.mcdi_enable = 0;
sampler_dndi[index].dw6.fmd_tear_threshold = 32;
+ sampler_dndi[index].dw6.cat_th1 = 0;
sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
- sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
- sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
- sampler_dndi[index].dw7.vdi_walker_enable = 0;
- sampler_dndi[index].dw7.column_width_minus1 = w / 16;
+ sampler_dndi[index].dw7.sad_tha = 5;
+ sampler_dndi[index].dw7.sad_thb = 10;
+ sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
+ sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
+ sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
+ sampler_dndi[index].dw7.vdi_walker_enable = 0;
+ sampler_dndi[index].dw7.neighborpixel_th = 10;
+ sampler_dndi[index].dw7.column_width_minus1 = w / 16;
+
+ dri_bo_unmap(pp_context->sampler_state_table.bo);
+
+ /* private function & data */
+ pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
+ pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
+ pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
+
+ pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
+ pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
+ pp_static_parameter->grf1.di_top_field_first = 0;
+ pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
+
+ pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
+ pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
+ pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
+
+ pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
+ pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
+
+ pp_dndi_context->dest_w = w;
+ pp_dndi_context->dest_h = h;
+
+ dst_surface->flags = I965_SURFACE_FLAG_FRAME;
+
+ return VA_STATUS_SUCCESS;
+}
+
+static int
+gen7_pp_dn_x_steps(void *private_context)
+{
+ struct pp_dn_context *pp_dn_context = private_context;
+
+ return pp_dn_context->dest_w / 16;
+}
+
+static int
+gen7_pp_dn_y_steps(void *private_context)
+{
+ struct pp_dn_context *pp_dn_context = private_context;
+
+ return pp_dn_context->dest_h / 4;
+}
+
+static int
+gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
+{
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+ pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
+ pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
+
+ return 0;
+}
+
+static VAStatus
+gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
+ struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
+ struct object_surface *obj_surface;
+ struct gen7_sampler_dndi *sampler_dn;
+
+ int index;
+ int w, h;
+ int orig_w, orig_h;
+ int dn_strength = 15;
+ int dndi_top_first = 1;
+ int dn_progressive = 0;
+
+ if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
+ dndi_top_first = 1;
+ dn_progressive = 1;
+ } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
+ dndi_top_first = 1;
+ dn_progressive = 0;
+ } else {
+ dndi_top_first = 0;
+ dn_progressive = 0;
+ }
+
+ /* surface */
+ obj_surface = SURFACE(src_surface->id);
+ orig_w = obj_surface->orig_width;
+ orig_h = obj_surface->orig_height;
+ w = obj_surface->width;
+ h = obj_surface->height;
+
+ if (pp_context->stmm.bo == NULL) {
+ pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
+ "STMM surface",
+ w * h,
+ 4096);
+ assert(pp_context->stmm.bo);
+ }
+
+ /* source UV surface index 1 */
+ gen7_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, w * h,
+ orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+ 1, 0);
+
+ /* source YUV surface index 3 */
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ orig_w, orig_h, w,
+ 0, h,
+ SURFACE_FORMAT_PLANAR_420_8, 1,
+ 3);
+
+ /* source (temporal reference) YUV surface index 4 */
+ gen7_pp_set_surface2_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ orig_w, orig_h, w,
+ 0, h,
+ SURFACE_FORMAT_PLANAR_420_8, 1,
+ 4);
+
+ /* STMM / History Statistics input surface, index 5 */
+ gen7_pp_set_surface_state(ctx, pp_context,
+ pp_context->stmm.bo, 0,
+ orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 5, 1);
+
+ /* destination surface */
+ obj_surface = SURFACE(dst_surface->id);
+ orig_w = obj_surface->orig_width;
+ orig_h = obj_surface->orig_height;
+ w = obj_surface->width;
+ h = obj_surface->height;
+
+ /* destination Y surface index 24 */
+ gen7_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, 0,
+ orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
+ 24, 1);
+
+ /* destination UV surface index 25 */
+ gen7_pp_set_surface_state(ctx, pp_context,
+ obj_surface->bo, w * h,
+ orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
+ 25, 1);
+
+ /* sampler dn */
+ dri_bo_map(pp_context->sampler_state_table.bo, True);
+ assert(pp_context->sampler_state_table.bo->virtual);
+ assert(sizeof(*sampler_dn) == sizeof(int) * 8);
+ sampler_dn = pp_context->sampler_state_table.bo->virtual;
+
+ /* sample dn index 1 */
+ index = 0;
+ sampler_dn[index].dw0.denoise_asd_threshold = 0;
+ sampler_dn[index].dw0.dnmh_delt = 8;
+ sampler_dn[index].dw0.vdi_walker_y_stride = 0;
+ sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
+ sampler_dn[index].dw0.denoise_maximum_history = 128; // 128-240
+ sampler_dn[index].dw0.denoise_stad_threshold = 0;
+
+ sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
+ sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
+ sampler_dn[index].dw1.stmm_c2 = 0;
+ sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
+ sampler_dn[index].dw1.temporal_difference_threshold = 16;
+
+ sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength; // 0-31
+ sampler_dn[index].dw2.bne_edge_th = 1;
+ sampler_dn[index].dw2.smooth_mv_th = 0;
+ sampler_dn[index].dw2.sad_tight_th = 5;
+ sampler_dn[index].dw2.cat_slope_minus1 = 9;
+ sampler_dn[index].dw2.good_neighbor_th = 4;
+
+ sampler_dn[index].dw3.maximum_stmm = 128;
+ sampler_dn[index].dw3.multipler_for_vecm = 2;
+ sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
+ sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
+ sampler_dn[index].dw3.stmm_blending_constant_select = 0;
+
+ sampler_dn[index].dw4.sdi_delta = 8;
+ sampler_dn[index].dw4.sdi_threshold = 128;
+ sampler_dn[index].dw4.stmm_output_shift = 7; // stmm_max - stmm_min = 2 ^ stmm_output_shift
+ sampler_dn[index].dw4.stmm_shift_up = 0;
+ sampler_dn[index].dw4.stmm_shift_down = 0;
+ sampler_dn[index].dw4.minimum_stmm = 0;
+
+ sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
+ sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
+ sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
+ sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
+
+ sampler_dn[index].dw6.dn_enable = 1;
+ sampler_dn[index].dw6.di_enable = 0;
+ sampler_dn[index].dw6.di_partial = 0;
+ sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
+ sampler_dn[index].dw6.dndi_stream_id = 1;
+ sampler_dn[index].dw6.dndi_first_frame = 1;
+ sampler_dn[index].dw6.progressive_dn = dn_progressive;
+ sampler_dn[index].dw6.mcdi_enable = 0;
+ sampler_dn[index].dw6.fmd_tear_threshold = 32;
+ sampler_dn[index].dw6.cat_th1 = 0;
+ sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
+ sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
+
+ sampler_dn[index].dw7.sad_tha = 5;
+ sampler_dn[index].dw7.sad_thb = 10;
+ sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
+ sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
+ sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
+ sampler_dn[index].dw7.vdi_walker_enable = 0;
+ sampler_dn[index].dw7.neighborpixel_th = 10;
+ sampler_dn[index].dw7.column_width_minus1 = w / 16;
dri_bo_unmap(pp_context->sampler_state_table.bo);
/* private function & data */
- pp_context->pp_x_steps = pp_dndi_x_steps;
- pp_context->pp_y_steps = pp_dndi_y_steps;
- pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
+ pp_context->pp_x_steps = gen7_pp_dn_x_steps;
+ pp_context->pp_y_steps = gen7_pp_dn_y_steps;
+ pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
- pp_static_parameter.grf1.statistics_surface_picth = w / 2;
- pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
- pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
- pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
+ pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
+ pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
+ pp_static_parameter->grf1.di_top_field_first = 0;
+ pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
- pp_inline_parameter.grf5.block_count_x = w / 16; /* 1 x N */
- pp_inline_parameter.grf5.number_blocks = w / 16;
- pp_inline_parameter.grf5.block_vertical_mask = 0xff;
- pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
+ pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
+ pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
+ pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
- pp_dndi_context->dest_w = w;
- pp_dndi_context->dest_h = h;
+ pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
+ pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
+
+ pp_dn_context->dest_w = w;
+ pp_dn_context->dest_h = h;
+
+ dst_surface->flags = src_surface->flags;
+
+ return VA_STATUS_SUCCESS;
}
-static void
+static VAStatus
ironlake_pp_initialize(
VADriverContextP ctx,
- VASurfaceID in_surface_id,
- VASurfaceID out_surface_id,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
const VARectangle *dst_rect,
- int pp_index
+ int pp_index,
+ void *filter_param
)
{
+ VAStatus va_status;
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_post_processing_context *pp_context = i965->pp_context;
struct pp_module *pp_module;
dri_bo *bo;
- int i;
+ int static_param_size, inline_param_size;
+
+ dri_bo_unreference(pp_context->surface_state_binding_table.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "surface state & binding table",
+ (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
+ 4096);
+ assert(bo);
+ pp_context->surface_state_binding_table.bo = bo;
dri_bo_unreference(pp_context->curbe.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
@@ -1740,14 +3476,6 @@ ironlake_pp_initialize(
assert(bo);
pp_context->curbe.bo = bo;
- dri_bo_unreference(pp_context->binding_table.bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "binding table",
- sizeof(unsigned int),
- 4096);
- assert(bo);
- pp_context->binding_table.bo = bo;
-
dri_bo_unreference(pp_context->idrt.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"interface discriptor",
@@ -1791,56 +3519,85 @@ ironlake_pp_initialize(
4096);
assert(bo);
pp_context->vfe_state.bo = bo;
-
- for (i = 0; i < MAX_PP_SURFACES; i++) {
- dri_bo_unreference(pp_context->surfaces[i].ss_bo);
- pp_context->surfaces[i].ss_bo = NULL;
-
- dri_bo_unreference(pp_context->surfaces[i].s_bo);
- pp_context->surfaces[i].s_bo = NULL;
- }
- memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
- memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
+ static_param_size = sizeof(struct pp_static_parameter);
+ inline_param_size = sizeof(struct pp_inline_parameter);
+
+ memset(pp_context->pp_static_parameter, 0, static_param_size);
+ memset(pp_context->pp_inline_parameter, 0, inline_param_size);
+
assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
pp_context->current_pp = pp_index;
pp_module = &pp_context->pp_modules[pp_index];
if (pp_module->initialize)
- pp_module->initialize(ctx, in_surface_id, out_surface_id,
- src_rect, dst_rect);
+ va_status = pp_module->initialize(ctx, pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ filter_param);
+ else
+ va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
+
+ return va_status;
}
-static void
+static VAStatus
ironlake_post_processing(
VADriverContextP ctx,
- VASurfaceID in_surface_id,
- VASurfaceID out_surface_id,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
const VARectangle *dst_rect,
- int pp_index
+ int pp_index,
+ void *filter_param
)
{
- ironlake_pp_initialize(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
- ironlake_pp_states_setup(ctx);
- ironlake_pp_pipeline_setup(ctx);
+ VAStatus va_status;
+
+ va_status = ironlake_pp_initialize(ctx, pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ pp_index,
+ filter_param);
+
+ if (va_status == VA_STATUS_SUCCESS) {
+ ironlake_pp_states_setup(ctx, pp_context);
+ ironlake_pp_pipeline_setup(ctx, pp_context);
+ }
+
+ return va_status;
}
-static void
+static VAStatus
gen6_pp_initialize(
VADriverContextP ctx,
- VASurfaceID in_surface_id,
- VASurfaceID out_surface_id,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
const VARectangle *dst_rect,
- int pp_index
+ int pp_index,
+ void * filter_param
)
{
+ VAStatus va_status;
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_post_processing_context *pp_context = i965->pp_context;
struct pp_module *pp_module;
dri_bo *bo;
- int i;
+ int static_param_size, inline_param_size;
+
+ dri_bo_unreference(pp_context->surface_state_binding_table.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "surface state & binding table",
+ (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
+ 4096);
+ assert(bo);
+ pp_context->surface_state_binding_table.bo = bo;
dri_bo_unreference(pp_context->curbe.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
@@ -1850,14 +3607,6 @@ gen6_pp_initialize(
assert(bo);
pp_context->curbe.bo = bo;
- dri_bo_unreference(pp_context->binding_table.bo);
- bo = dri_bo_alloc(i965->intel.bufmgr,
- "binding table",
- sizeof(unsigned int),
- 4096);
- assert(bo);
- pp_context->binding_table.bo = bo;
-
dri_bo_unreference(pp_context->idrt.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"interface discriptor",
@@ -1902,57 +3651,41 @@ gen6_pp_initialize(
assert(bo);
pp_context->vfe_state.bo = bo;
- for (i = 0; i < MAX_PP_SURFACES; i++) {
- dri_bo_unreference(pp_context->surfaces[i].ss_bo);
- pp_context->surfaces[i].ss_bo = NULL;
-
- dri_bo_unreference(pp_context->surfaces[i].s_bo);
- pp_context->surfaces[i].s_bo = NULL;
+ if (IS_GEN7(i965->intel.device_id)) {
+ static_param_size = sizeof(struct gen7_pp_static_parameter);
+ inline_param_size = sizeof(struct gen7_pp_inline_parameter);
+ } else {
+ static_param_size = sizeof(struct pp_static_parameter);
+ inline_param_size = sizeof(struct pp_inline_parameter);
}
- memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
- memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
+ memset(pp_context->pp_static_parameter, 0, static_param_size);
+ memset(pp_context->pp_inline_parameter, 0, inline_param_size);
+
assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
pp_context->current_pp = pp_index;
pp_module = &pp_context->pp_modules[pp_index];
if (pp_module->initialize)
- pp_module->initialize(ctx, in_surface_id, out_surface_id,
- src_rect, dst_rect);
-}
-
-static void
-gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
-{
- unsigned int *binding_table;
- dri_bo *bo = pp_context->binding_table.bo;
- int i;
-
- dri_bo_map(bo, 1);
- assert(bo->virtual);
- binding_table = bo->virtual;
- memset(binding_table, 0, bo->size);
-
- for (i = 0; i < MAX_PP_SURFACES; i++) {
- if (pp_context->surfaces[i].ss_bo) {
- assert(pp_context->surfaces[i].s_bo);
-
- binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- 0,
- i * sizeof(*binding_table),
- pp_context->surfaces[i].ss_bo);
- }
-
- }
+ va_status = pp_module->initialize(ctx, pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ filter_param);
+ else
+ va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
+
+ calculate_boundary_block_mask(pp_context, dst_rect);
- dri_bo_unmap(bo);
+ return va_status;
}
static void
-gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
+gen6_pp_interface_descriptor_table(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_interface_descriptor_data *desc;
dri_bo *bo;
int pp_index = pp_context->current_pp;
@@ -1970,10 +3703,13 @@ gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_conte
desc->desc2.sampler_state_pointer =
pp_context->sampler_state_table.bo->offset >> 5;
desc->desc3.binding_table_entry_count = 0;
- desc->desc3.binding_table_pointer =
- pp_context->binding_table.bo->offset >> 5; /*reloc */
+ desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
desc->desc4.constant_urb_entry_read_offset = 0;
- desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
+
+ if (IS_GEN7(i965->intel.device_id))
+ desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
+ else
+ desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
dri_bo_emit_reloc(bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -1987,45 +3723,46 @@ gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_conte
offsetof(struct gen6_interface_descriptor_data, desc2),
pp_context->sampler_state_table.bo);
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- desc->desc3.binding_table_entry_count,
- offsetof(struct gen6_interface_descriptor_data, desc3),
- pp_context->binding_table.bo);
-
dri_bo_unmap(bo);
pp_context->idrt.num_interface_descriptors++;
}
static void
-gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
+gen6_pp_upload_constants(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
unsigned char *constant_buffer;
+ int param_size;
+
+ assert(sizeof(struct pp_static_parameter) == 128);
+ assert(sizeof(struct gen7_pp_static_parameter) == 192);
+
+ if (IS_GEN7(i965->intel.device_id))
+ param_size = sizeof(struct gen7_pp_static_parameter);
+ else
+ param_size = sizeof(struct pp_static_parameter);
- assert(sizeof(pp_static_parameter) == 128);
dri_bo_map(pp_context->curbe.bo, 1);
assert(pp_context->curbe.bo->virtual);
constant_buffer = pp_context->curbe.bo->virtual;
- memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
+ memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
dri_bo_unmap(pp_context->curbe.bo);
}
static void
-gen6_pp_states_setup(VADriverContextP ctx)
+gen6_pp_states_setup(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_post_processing_context *pp_context = i965->pp_context;
-
- gen6_pp_binding_table(pp_context);
- gen6_pp_interface_descriptor_table(pp_context);
- gen6_pp_upload_constants(pp_context);
+ gen6_pp_interface_descriptor_table(ctx, pp_context);
+ gen6_pp_upload_constants(ctx, pp_context);
}
static void
-gen6_pp_pipeline_select(VADriverContextP ctx)
+gen6_pp_pipeline_select(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
BEGIN_BATCH(batch, 1);
OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
@@ -2033,15 +3770,15 @@ gen6_pp_pipeline_select(VADriverContextP ctx)
}
static void
-gen6_pp_state_base_address(VADriverContextP ctx)
+gen6_pp_state_base_address(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
BEGIN_BATCH(batch, 10);
OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
- OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
+ OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
@@ -2053,10 +3790,10 @@ gen6_pp_state_base_address(VADriverContextP ctx)
}
static void
-gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+gen6_pp_vfe_state(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
BEGIN_BATCH(batch, 8);
OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
@@ -2066,8 +3803,8 @@ gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_
pp_context->urb.num_vfe_entries << 8);
OUT_BATCH(batch, 0);
OUT_BATCH(batch,
- (pp_context->urb.size_vfe_entry * 2) << 16 | /* in 256 bits unit */
- (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1)); /* in 256 bits unit */
+ (pp_context->urb.size_vfe_entry * 2) << 16 | /* URB Entry Allocation Size, in 256 bits unit */
+ (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
OUT_BATCH(batch, 0);
@@ -2075,18 +3812,18 @@ gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_
}
static void
-gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+gen6_pp_curbe_load(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
- assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
+ assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
BEGIN_BATCH(batch, 4);
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
OUT_BATCH(batch,
- pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
+ pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
OUT_RELOC(batch,
pp_context->curbe.bo,
I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -2095,10 +3832,10 @@ gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp
}
static void
-gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+gen6_interface_descriptor_load(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
BEGIN_BATCH(batch, 4);
OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
@@ -2112,87 +3849,215 @@ gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing
ADVANCE_BATCH(batch);
}
+static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps)
+{
+ struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+ pp_inline_parameter->grf5.block_vertical_mask = 0xff;
+ pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
+ // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
+ pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
+ pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
+ pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
+
+ /* 1 x N */
+ if (x_steps == 1) {
+ if (y == y_steps-1) {
+ pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
+ }
+ else {
+ pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
+ }
+ }
+
+ /* M x 1 */
+ if (y_steps == 1) {
+ if (x == 0) { // all blocks in this group are on the left edge
+ pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
+ pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left;
+ }
+ else if (x == x_steps-1) {
+ pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
+ pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
+ }
+ else {
+ pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
+ pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
+ pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
+ }
+ }
+
+}
+
static void
-gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
+gen6_pp_object_walker(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
+ struct intel_batchbuffer *batch = pp_context->batch;
int x, x_steps, y, y_steps;
+ int param_size, command_length_in_dws;
+ dri_bo *command_buffer;
+ unsigned int *command_ptr;
+
+ if (IS_GEN7(i965->intel.device_id))
+ param_size = sizeof(struct gen7_pp_inline_parameter);
+ else
+ param_size = sizeof(struct pp_inline_parameter);
x_steps = pp_context->pp_x_steps(&pp_context->private_context);
y_steps = pp_context->pp_y_steps(&pp_context->private_context);
+ command_length_in_dws = 6 + (param_size >> 2);
+ command_buffer = dri_bo_alloc(i965->intel.bufmgr,
+ "command objects buffer",
+ command_length_in_dws * 4 * x_steps * y_steps + 8,
+ 4096);
+
+ dri_bo_map(command_buffer, 1);
+ command_ptr = command_buffer->virtual;
for (y = 0; y < y_steps; y++) {
for (x = 0; x < x_steps; x++) {
if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
- BEGIN_BATCH(batch, 22);
- OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
- OUT_BATCH(batch, 0);
- OUT_BATCH(batch, 0); /* no indirect data */
- OUT_BATCH(batch, 0);
- OUT_BATCH(batch, 0); /* scoreboard */
- OUT_BATCH(batch, 0);
-
- /* inline data grf 5-6 */
- assert(sizeof(pp_inline_parameter) == 64);
- intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
-
- ADVANCE_BATCH(batch);
+ // some common block parameter update goes here, apply to all pp functions
+ if (IS_GEN6(i965->intel.device_id))
+ update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
+ command_ptr += (param_size >> 2);
}
}
}
-}
+ if (command_length_in_dws * x_steps * y_steps % 2 == 0)
+ *command_ptr++ = 0;
+
+ *command_ptr = MI_BATCH_BUFFER_END;
+
+ dri_bo_unmap(command_buffer);
+
+ BEGIN_BATCH(batch, 2);
+ OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
+ OUT_RELOC(batch, command_buffer,
+ I915_GEM_DOMAIN_COMMAND, 0,
+ 0);
+ ADVANCE_BATCH(batch);
+
+ dri_bo_unreference(command_buffer);
+
+ /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
+ * will cause control to pass back to ring buffer
+ */
+ intel_batchbuffer_end_atomic(batch);
+ intel_batchbuffer_flush(batch);
+ intel_batchbuffer_start_atomic(batch, 0x1000);
+}
static void
-gen6_pp_pipeline_setup(VADriverContextP ctx)
+gen6_pp_pipeline_setup(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context)
{
- struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct intel_batchbuffer *batch = i965->batch;
- struct i965_post_processing_context *pp_context = i965->pp_context;
+ struct intel_batchbuffer *batch = pp_context->batch;
intel_batchbuffer_start_atomic(batch, 0x1000);
intel_batchbuffer_emit_mi_flush(batch);
- gen6_pp_pipeline_select(ctx);
+ gen6_pp_pipeline_select(ctx, pp_context);
+ gen6_pp_state_base_address(ctx, pp_context);
+ gen6_pp_vfe_state(ctx, pp_context);
gen6_pp_curbe_load(ctx, pp_context);
gen6_interface_descriptor_load(ctx, pp_context);
- gen6_pp_state_base_address(ctx);
gen6_pp_vfe_state(ctx, pp_context);
gen6_pp_object_walker(ctx, pp_context);
intel_batchbuffer_end_atomic(batch);
}
-static void
+static VAStatus
gen6_post_processing(
VADriverContextP ctx,
- VASurfaceID in_surface_id,
- VASurfaceID out_surface_id,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ int pp_index,
+ void *filter_param
+)
+{
+ VAStatus va_status;
+
+ va_status = gen6_pp_initialize(ctx, pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ pp_index,
+ filter_param);
+
+ if (va_status == VA_STATUS_SUCCESS) {
+ gen6_pp_states_setup(ctx, pp_context);
+ gen6_pp_pipeline_setup(ctx, pp_context);
+ }
+
+ return va_status;
+}
+
+static VAStatus
+gen75_post_processing(
+ VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
const VARectangle *dst_rect,
- int pp_index
+ int pp_index,
+ void *filter_param
)
{
- gen6_pp_initialize(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
- gen6_pp_states_setup(ctx);
- gen6_pp_pipeline_setup(ctx);
+ VAStatus va_status;
+ struct intel_vebox_context * vebox_ctx = pp_context->pp_vebox_context;
+
+ assert(pp_index == PP_NV12_DNDI);
+
+ vebox_ctx->filters_mask = VPP_DNDI_DI;
+ vebox_ctx->surface_input = src_surface->id;
+ vebox_ctx->surface_output = dst_surface->id;
+
+ va_status = gen75_vebox_process_picture(ctx, vebox_ctx);
+
+ return va_status;
}
-static void
+static VAStatus
i965_post_processing_internal(
VADriverContextP ctx,
- VASurfaceID in_surface_id,
- VASurfaceID out_surface_id,
+ struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
const VARectangle *dst_rect,
- int pp_index
+ int pp_index,
+ void *filter_param
)
{
+ VAStatus va_status;
struct i965_driver_data *i965 = i965_driver_data(ctx);
- if (IS_GEN6(i965->intel.device_id) ||
- IS_GEN7(i965->intel.device_id))
- gen6_post_processing(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
- else
- ironlake_post_processing(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
+ if(IS_HASWELL(i965->intel.device_id) &&
+ pp_index == PP_NV12_DNDI){
+ va_status = gen75_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
+ }else if (IS_GEN6(i965->intel.device_id) ||
+ IS_GEN7(i965->intel.device_id)){
+ va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
+ }else{
+ va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
+ }
+
+ return va_status;
}
VAStatus
@@ -2206,6 +4071,103 @@ i965_CreateSurfaces(VADriverContextP ctx,
int format,
int num_surfaces,
VASurfaceID *surfaces);
+
+static void
+rgb_to_yuv(unsigned int argb,
+ unsigned char *y,
+ unsigned char *u,
+ unsigned char *v,
+ unsigned char *a)
+{
+ int r = ((argb >> 16) & 0xff);
+ int g = ((argb >> 8) & 0xff);
+ int b = ((argb >> 0) & 0xff);
+
+ *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
+ *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
+ *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
+ *a = ((argb >> 24) & 0xff);
+}
+
+static void
+i965_vpp_clear_surface(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context,
+ VASurfaceID surface,
+ unsigned int color)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct intel_batchbuffer *batch = pp_context->batch;
+ struct object_surface *obj_surface = SURFACE(surface);
+ unsigned int blt_cmd, br13;
+ unsigned int tiling = 0, swizzle = 0;
+ int pitch;
+ unsigned char y, u, v, a = 0;
+
+ /* Currently only support NV12 surface */
+ if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
+ return;
+
+ rgb_to_yuv(color, &y, &u, &v, &a);
+
+ if (a == 0)
+ return;
+
+ dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
+ blt_cmd = XY_COLOR_BLT_CMD;
+ pitch = obj_surface->width;
+
+ if (tiling != I915_TILING_NONE) {
+ blt_cmd |= XY_COLOR_BLT_DST_TILED;
+ pitch >>= 2;
+ }
+
+ br13 = 0xf0 << 16;
+ br13 |= BR13_8;
+ br13 |= pitch;
+
+ if (IS_GEN6(i965->intel.device_id) ||
+ IS_GEN7(i965->intel.device_id)) {
+ intel_batchbuffer_start_atomic_blt(batch, 48);
+ BEGIN_BLT_BATCH(batch, 12);
+ } else {
+ intel_batchbuffer_start_atomic(batch, 48);
+ BEGIN_BATCH(batch, 12);
+ }
+
+ OUT_BATCH(batch, blt_cmd);
+ OUT_BATCH(batch, br13);
+ OUT_BATCH(batch,
+ 0 << 16 |
+ 0);
+ OUT_BATCH(batch,
+ obj_surface->height << 16 |
+ obj_surface->width);
+ OUT_RELOC(batch, obj_surface->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0);
+ OUT_BATCH(batch, y);
+
+ br13 = 0xf0 << 16;
+ br13 |= BR13_565;
+ br13 |= pitch;
+
+ OUT_BATCH(batch, blt_cmd);
+ OUT_BATCH(batch, br13);
+ OUT_BATCH(batch,
+ 0 << 16 |
+ 0);
+ OUT_BATCH(batch,
+ obj_surface->height / 2 << 16 |
+ obj_surface->width / 2);
+ OUT_RELOC(batch, obj_surface->bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ obj_surface->width * obj_surface->y_cb_offset);
+ OUT_BATCH(batch, v << 8 | u);
+
+ ADVANCE_BATCH(batch);
+ intel_batchbuffer_end_atomic(batch);
+}
+
VASurfaceID
i965_post_processing(
VADriverContextP ctx,
@@ -2225,6 +4187,8 @@ i965_post_processing(
if (i965->render_state.interleaved_uv) {
struct object_surface *obj_surface;
VAStatus status;
+ struct i965_surface src_surface;
+ struct i965_surface dst_surface;
if (flags & I965_PP_FLAG_DEINTERLACING) {
obj_surface = SURFACE(in_surface_id);
@@ -2236,11 +4200,26 @@ i965_post_processing(
&out_surface_id);
assert(status == VA_STATUS_SUCCESS);
obj_surface = SURFACE(out_surface_id);
- i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
- i965_post_processing_internal(ctx,
- in_surface_id, out_surface_id,
- src_rect, dst_rect,
- PP_NV12_DNDI);
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+
+ i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0);
+
+ src_surface.id = in_surface_id;
+ src_surface.type = I965_SURFACE_TYPE_SURFACE;
+ src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ?
+ I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOM_FIELD_FIRST;
+ dst_surface.id = out_surface_id;
+ dst_surface.type = I965_SURFACE_TYPE_SURFACE;
+ dst_surface.flags = I965_SURFACE_FLAG_FRAME;
+
+ i965_post_processing_internal(ctx, i965->pp_context,
+ &src_surface,
+ src_rect,
+ &dst_surface,
+ dst_rect,
+ PP_NV12_DNDI,
+ NULL);
+ printf("Deinterlace is executed here\n");
}
if (flags & I965_PP_FLAG_AVS) {
@@ -2259,10 +4238,22 @@ i965_post_processing(
assert(status == VA_STATUS_SUCCESS);
obj_surface = SURFACE(out_surface_id);
i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
- i965_post_processing_internal(ctx,
- in_surface_id, out_surface_id,
- src_rect, dst_rect,
- PP_NV12_AVS);
+ i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0);
+
+ src_surface.id = in_surface_id;
+ src_surface.type = I965_SURFACE_TYPE_SURFACE;
+ src_surface.flags = I965_SURFACE_FLAG_FRAME;
+ dst_surface.id = out_surface_id;
+ dst_surface.type = I965_SURFACE_TYPE_SURFACE;
+ dst_surface.flags = I965_SURFACE_FLAG_FRAME;
+
+ i965_post_processing_internal(ctx, i965->pp_context,
+ &src_surface,
+ src_rect,
+ &dst_surface,
+ dst_rect,
+ PP_NV12_AVS,
+ NULL);
if (in_surface_id != surface)
i965_DestroySurfaces(ctx, &in_surface_id, 1);
@@ -2275,108 +4266,329 @@ i965_post_processing(
return out_surface_id;
}
-Bool
-i965_post_processing_terminate(VADriverContextP ctx)
+static VAStatus
+i965_image_pl3_processing(VADriverContextP ctx,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_post_processing_context *pp_context = i965->pp_context;
- int i;
+ int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
+
+ if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+ i965_post_processing_internal(ctx, i965->pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ PP_PL3_LOAD_SAVE_N12,
+ NULL);
+ } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
+ fourcc == VA_FOURCC('I', 'M', 'C', '3') ||
+ fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
+ fourcc == VA_FOURCC('I', '4', '2', '0') ) {
+ i965_post_processing_internal(ctx, i965->pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ PP_PL3_LOAD_SAVE_PL3,
+ NULL);
+ } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
+ fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+ i965_post_processing_internal(ctx, i965->pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ PP_PL3_LOAD_SAVE_PA,
+ NULL);
+
+ }
+ else {
+ assert(0);
+ }
+
+ intel_batchbuffer_flush(pp_context->batch);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+i965_image_pl2_processing(VADriverContextP ctx,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_post_processing_context *pp_context = i965->pp_context;
+ int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
+
+ if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+ i965_post_processing_internal(ctx, i965->pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ PP_NV12_LOAD_SAVE_N12,
+ NULL);
+ } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
+ fourcc == VA_FOURCC('I', 'M', 'C', '3') ||
+ fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
+ fourcc == VA_FOURCC('I', '4', '2', '0') ) {
+ i965_post_processing_internal(ctx, i965->pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ PP_NV12_LOAD_SAVE_PL3,
+ NULL);
+ } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
+ fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
+ i965_post_processing_internal(ctx, i965->pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ PP_NV12_LOAD_SAVE_PA,
+ NULL);
+ }
+
+ intel_batchbuffer_flush(pp_context->batch);
+
+ return VA_STATUS_SUCCESS;
+}
+
+static VAStatus
+i965_image_pl1_processing(VADriverContextP ctx,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_post_processing_context *pp_context = i965->pp_context;
+ int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
+
+ if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+ i965_post_processing_internal(ctx, i965->pp_context,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect,
+ PP_PA_LOAD_SAVE_NV12,
+ NULL);
+ }
+ else {
+ return VA_STATUS_ERROR_UNKNOWN;
+ }
+
+ intel_batchbuffer_flush(pp_context->batch);
+
+ return VA_STATUS_SUCCESS;
+}
+
+VAStatus
+i965_image_processing(VADriverContextP ctx,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
if (HAS_PP(i965)) {
- if (pp_context) {
- dri_bo_unreference(pp_context->curbe.bo);
- pp_context->curbe.bo = NULL;
+ int fourcc = pp_get_surface_fourcc(ctx, src_surface);
+
+ switch (fourcc) {
+ case VA_FOURCC('Y', 'V', '1', '2'):
+ case VA_FOURCC('I', '4', '2', '0'):
+ case VA_FOURCC('I', 'M', 'C', '1'):
+ case VA_FOURCC('I', 'M', 'C', '3'):
+ status = i965_image_pl3_processing(ctx,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect);
+ break;
+
+ case VA_FOURCC('N', 'V', '1', '2'):
+ status = i965_image_pl2_processing(ctx,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect);
+ break;
+ case VA_FOURCC('Y', 'U', 'Y', '2'):
+ case VA_FOURCC('U', 'Y', 'V', 'Y'):
+ status = i965_image_pl1_processing(ctx,
+ src_surface,
+ src_rect,
+ dst_surface,
+ dst_rect);
+ break;
+
+ default:
+ status = VA_STATUS_ERROR_UNIMPLEMENTED;
+ break;
+ }
+ }
- for (i = 0; i < MAX_PP_SURFACES; i++) {
- dri_bo_unreference(pp_context->surfaces[i].ss_bo);
- pp_context->surfaces[i].ss_bo = NULL;
+ return status;
+}
- dri_bo_unreference(pp_context->surfaces[i].s_bo);
- pp_context->surfaces[i].s_bo = NULL;
- }
- dri_bo_unreference(pp_context->sampler_state_table.bo);
- pp_context->sampler_state_table.bo = NULL;
+static void
+i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
+{
+ int i;
- dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
- pp_context->sampler_state_table.bo_8x8 = NULL;
+ dri_bo_unreference(pp_context->surface_state_binding_table.bo);
+ pp_context->surface_state_binding_table.bo = NULL;
- dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
- pp_context->sampler_state_table.bo_8x8_uv = NULL;
+ dri_bo_unreference(pp_context->curbe.bo);
+ pp_context->curbe.bo = NULL;
- dri_bo_unreference(pp_context->binding_table.bo);
- pp_context->binding_table.bo = NULL;
+ dri_bo_unreference(pp_context->sampler_state_table.bo);
+ pp_context->sampler_state_table.bo = NULL;
- dri_bo_unreference(pp_context->idrt.bo);
- pp_context->idrt.bo = NULL;
- pp_context->idrt.num_interface_descriptors = 0;
+ dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
+ pp_context->sampler_state_table.bo_8x8 = NULL;
- dri_bo_unreference(pp_context->vfe_state.bo);
- pp_context->vfe_state.bo = NULL;
+ dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
+ pp_context->sampler_state_table.bo_8x8_uv = NULL;
- dri_bo_unreference(pp_context->stmm.bo);
- pp_context->stmm.bo = NULL;
+ dri_bo_unreference(pp_context->idrt.bo);
+ pp_context->idrt.bo = NULL;
+ pp_context->idrt.num_interface_descriptors = 0;
- for (i = 0; i < NUM_PP_MODULES; i++) {
- struct pp_module *pp_module = &pp_context->pp_modules[i];
+ dri_bo_unreference(pp_context->vfe_state.bo);
+ pp_context->vfe_state.bo = NULL;
- dri_bo_unreference(pp_module->kernel.bo);
- pp_module->kernel.bo = NULL;
- }
+ dri_bo_unreference(pp_context->stmm.bo);
+ pp_context->stmm.bo = NULL;
- free(pp_context);
- }
+ for (i = 0; i < NUM_PP_MODULES; i++) {
+ struct pp_module *pp_module = &pp_context->pp_modules[i];
+
+ dri_bo_unreference(pp_module->kernel.bo);
+ pp_module->kernel.bo = NULL;
+ }
+
+ free(pp_context->pp_static_parameter);
+ free(pp_context->pp_inline_parameter);
+ pp_context->pp_static_parameter = NULL;
+ pp_context->pp_inline_parameter = NULL;
+}
+
+Bool
+i965_post_processing_terminate(VADriverContextP ctx)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct i965_post_processing_context *pp_context = i965->pp_context;
- i965->pp_context = NULL;
+ if (pp_context) {
+ if(IS_HASWELL(i965->intel.device_id)){
+ gen75_vebox_context_destroy(ctx, pp_context->pp_vebox_context);
+ }
+
+ i965_post_processing_context_finalize(pp_context);
+ free(pp_context);
}
+ i965->pp_context = NULL;
+
return True;
}
+static void
+i965_post_processing_context_init(VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context,
+ struct intel_batchbuffer *batch)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ int i;
+
+ pp_context->urb.size = URB_SIZE((&i965->intel));
+ pp_context->urb.num_vfe_entries = 32;
+ pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */
+ pp_context->urb.num_cs_entries = 1;
+
+ if (IS_GEN7(i965->intel.device_id))
+ pp_context->urb.size_cs_entry = 4; /* in 512 bits unit */
+ else
+ pp_context->urb.size_cs_entry = 2;
+
+ pp_context->urb.vfe_start = 0;
+ pp_context->urb.cs_start = pp_context->urb.vfe_start +
+ pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
+ assert(pp_context->urb.cs_start +
+ pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
+
+ assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
+ assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
+ assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
+ assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
+
+ if (IS_HASWELL(i965->intel.device_id))
+ memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
+ else if (IS_GEN7(i965->intel.device_id))
+ memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
+ else if (IS_GEN6(i965->intel.device_id))
+ memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
+ else if (IS_IRONLAKE(i965->intel.device_id))
+ memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
+
+ for (i = 0; i < NUM_PP_MODULES; i++) {
+ struct pp_module *pp_module = &pp_context->pp_modules[i];
+ dri_bo_unreference(pp_module->kernel.bo);
+ if (pp_module->kernel.bin && pp_module->kernel.size) {
+ pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
+ pp_module->kernel.name,
+ pp_module->kernel.size,
+ 4096);
+ assert(pp_module->kernel.bo);
+ dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
+ } else {
+ pp_module->kernel.bo = NULL;
+ }
+ }
+
+ /* static & inline parameters */
+ if (IS_GEN7(i965->intel.device_id)) {
+ pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
+ pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
+ } else {
+ pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
+ pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
+ }
+
+ pp_context->batch = batch;
+}
+
Bool
i965_post_processing_init(VADriverContextP ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct i965_post_processing_context *pp_context = i965->pp_context;
- int i;
if (HAS_PP(i965)) {
if (pp_context == NULL) {
pp_context = calloc(1, sizeof(*pp_context));
+ i965_post_processing_context_init(ctx, pp_context, i965->batch);
i965->pp_context = pp_context;
- pp_context->urb.size = URB_SIZE((&i965->intel));
- pp_context->urb.num_vfe_entries = 32;
- pp_context->urb.size_vfe_entry = 1; /* in 512 bits unit */
- pp_context->urb.num_cs_entries = 1;
- pp_context->urb.size_cs_entry = 2; /* in 512 bits unit */
- pp_context->urb.vfe_start = 0;
- pp_context->urb.cs_start = pp_context->urb.vfe_start +
- pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
- assert(pp_context->urb.cs_start +
- pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
-
- assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
- assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
-
- if (IS_GEN6(i965->intel.device_id) ||
- IS_GEN7(i965->intel.device_id))
- memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
- else if (IS_IRONLAKE(i965->intel.device_id))
- memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
-
- for (i = 0; i < NUM_PP_MODULES; i++) {
- struct pp_module *pp_module = &pp_context->pp_modules[i];
- dri_bo_unreference(pp_module->kernel.bo);
- pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
- pp_module->kernel.name,
- pp_module->kernel.size,
- 4096);
- assert(pp_module->kernel.bo);
- dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
+ if(IS_HASWELL(i965->intel.device_id)){
+ pp_context->pp_vebox_context = gen75_vebox_context_init(ctx);
}
}
}
return True;
}
+
+
diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h
old mode 100644
new mode 100755
index 5f4e949..e55d1f6
--- a/src/i965_post_processing.h
+++ b/src/i965_post_processing.h
@@ -29,26 +29,35 @@
#ifndef __I965_POST_PROCESSING_H__
#define __I965_POST_PROCESSING_H__
-#define MAX_PP_SURFACES 32
+#define MAX_PP_SURFACES 48
-#define I965_PP_FLAG_TOP_FIELD 1
-#define I965_PP_FLAG_BOTTOM_FIELD 2
-#define I965_PP_FLAG_DEINTERLACING 4 /* XXX: don't support MCDI yet */
-#define I965_PP_FLAG_AVS 8
+#define I965_PP_FLAG_TOP_FIELD 1
+#define I965_PP_FLAG_BOTTOM_FIELD 2
+
+#define I965_PP_FLAG_AVS 4
+#define I965_PP_FLAG_DEINTERLACING 8
enum
{
PP_NULL = 0,
- PP_NV12_LOAD_SAVE,
+ PP_NV12_LOAD_SAVE_N12,
+ PP_NV12_LOAD_SAVE_PL3,
+ PP_PL3_LOAD_SAVE_N12,
+ PP_PL3_LOAD_SAVE_PL3,
PP_NV12_SCALING,
PP_NV12_AVS,
PP_NV12_DNDI,
+ PP_NV12_DN,
+ PP_NV12_LOAD_SAVE_PA,
+ PP_PL3_LOAD_SAVE_PA,
+ PP_PA_LOAD_SAVE_NV12,
+ NUM_PP_MODULES,
};
-#define NUM_PP_MODULES 5
-
struct pp_load_save_context
{
+ int dest_x;
+ int dest_y;
int dest_w;
int dest_h;
};
@@ -81,14 +90,25 @@ struct pp_dndi_context
int dest_h;
};
+struct pp_dn_context
+{
+ int dest_w;
+ int dest_h;
+};
+
+struct i965_post_processing_context;
+
struct pp_module
{
struct i965_kernel kernel;
/* others */
- void (*initialize)(VADriverContextP ctx,
- VASurfaceID in_surface_id, VASurfaceID out_surface_id,
- const VARectangle *src_rect, const VARectangle *dst_rect);
+ VAStatus (*initialize)(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect,
+ void *filter_param);
};
struct pp_static_parameter
@@ -278,6 +298,9 @@ struct pp_inline_parameter
unsigned int block_count_x:8;
/* r5.6 */
+ /* we only support M*1 or 1*N block partitation now.
+ * -- it means asm code only need update this mask from grf6 for the last block
+ */
unsigned int block_horizontal_mask:16;
unsigned int block_vertical_mask:8;
unsigned int number_blocks:8;
@@ -290,30 +313,126 @@ struct pp_inline_parameter
/* AVS r6.0 */
float video_step_delta;
- /* r6.1-r6.7 */
+ /* r6.1 */ // sizeof(int) == 4?
+ unsigned int block_horizontal_mask_right:16;
+ unsigned int block_vertical_mask_bottom:8;
+ unsigned int pad1:8;
+
+ /* r6.2 */
+ unsigned int block_horizontal_mask_middle:16;
+ unsigned int pad2:16;
+
+ /* r6.3-r6.7 */
+ unsigned int padx[5];
+ } grf6;
+};
+
+struct gen7_pp_static_parameter
+{
+ struct {
+ /* r1.0-r1.5 */
+ unsigned int padx[6];
+ /* r1.6 */
+ unsigned int di_statistics_surface_pitch_div2:16;
+ unsigned int di_statistics_surface_height_div4:16;
+ /* r1.7 */
+ unsigned int di_top_field_first:8;
+ unsigned int pad0:16;
+ unsigned int pointer_to_inline_parameter:8; /* value: 7 */
+ } grf1;
+
+ struct {
+ /* r2.0 */
+ unsigned int pad3;
+
+ /* r2.1 */
+ unsigned int pad2:16;
+ unsigned int save_avs_rgb_swap:1; /* 0: RGB, 1: BGR */
+ unsigned int avs_wa_enable:1; /* must enabled for GEN7 */
+ unsigned int pad1:1;
+ unsigned int avs_wa_width:13;
+
+ /* 2.2 */
+ float avs_wa_one_div_256_width;
+
+ /* 2.3 */
+ float avs_wa_five_div_256_width;
+
+ /* 2.4 - 2.6 */
+ unsigned int padx[3];
+
+ /* r2.7 */
+ unsigned int di_destination_packed_y_component_offset:8;
+ unsigned int di_destination_packed_u_component_offset:8;
+ unsigned int di_destination_packed_v_component_offset:8;
+ unsigned int pad0:8;
+ } grf2;
+
+ struct {
+ float sampler_load_horizontal_scaling_step_ratio;
+ unsigned int padx[7];
+ } grf3;
+
+ struct {
+ float sampler_load_vertical_scaling_step;
+ unsigned int pad0;
+ unsigned int di_hoffset_svf_from_dvf:16;
+ unsigned int di_voffset_svf_from_dvf:16;
+ unsigned int padx[5];
+ } grf4;
+
+ struct {
+ float sampler_load_vertical_frame_origin;
+ unsigned int padx[7];
+ } grf5;
+
+ struct {
+ float sampler_load_horizontal_frame_origin;
unsigned int padx[7];
} grf6;
};
+struct gen7_pp_inline_parameter
+{
+ struct {
+ /* r7.0 */
+ unsigned int destination_block_horizontal_origin:16;
+ unsigned int destination_block_vertical_origin:16;
+ /* r7.1: 0xffffffff */
+ unsigned int constant_0;
+ /* r7.2 */
+ unsigned int pad0;
+ /* r7.3 */
+ unsigned int pad1;
+ /* r7.4 */
+ float sampler_load_main_video_x_scaling_step;
+ /* r7.5 */
+ unsigned int pad2;
+ /* r7.6: must be zero */
+ unsigned int avs_vertical_block_number;
+ /* r7.7: 0 */
+ unsigned int group_id_number;
+ } grf7;
+
+ struct {
+ unsigned int padx[8];
+ } grf8;
+};
+
struct i965_post_processing_context
{
int current_pp;
struct pp_module pp_modules[NUM_PP_MODULES];
- struct pp_static_parameter pp_static_parameter;
- struct pp_inline_parameter pp_inline_parameter;
+ void *pp_static_parameter;
+ void *pp_inline_parameter;
struct {
dri_bo *bo;
- } curbe;
-
- struct {
- dri_bo *ss_bo;
- dri_bo *s_bo;
- } surfaces[MAX_PP_SURFACES];
+ } surface_state_binding_table;
struct {
dri_bo *bo;
- } binding_table;
+ } curbe;
struct {
dri_bo *bo;
@@ -352,11 +471,19 @@ struct i965_post_processing_context
struct pp_scaling_context pp_scaling_context;
struct pp_avs_context pp_avs_context;
struct pp_dndi_context pp_dndi_context;
+ struct pp_dn_context pp_dn_context;
} private_context;
int (*pp_x_steps)(void *private_context);
int (*pp_y_steps)(void *private_context);
int (*pp_set_block_parameter)(struct i965_post_processing_context *pp_context, int x, int y);
+ struct intel_batchbuffer *batch;
+ unsigned int block_horizontal_mask_left:16;
+ unsigned int block_horizontal_mask_right:16;
+ unsigned int block_vertical_mask_bottom:8;
+
+ /* video process based on hsw vebox */
+ struct intel_vebox_context *pp_vebox_context;
};
VASurfaceID
@@ -369,6 +496,13 @@ i965_post_processing(
int *has_done_scaling
);
+VAStatus
+i965_image_processing(VADriverContextP ctx,
+ const struct i965_surface *src_surface,
+ const VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ const VARectangle *dst_rect);
+
Bool
i965_post_processing_terminate(VADriverContextP ctx);
Bool
diff --git a/src/i965_render.c b/src/i965_render.c
index f6b6dde..4adfba6 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -36,8 +36,6 @@
#include <string.h>
#include <assert.h>
-#include <va/va_dricommon.h>
-
#include "intel_batchbuffer.h"
#include "intel_driver.h"
#include "i965_defines.h"
@@ -133,6 +131,14 @@ static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
#include "shaders/render/exa_wm_write.g7b"
};
+/* Programs for Haswell */
+static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
+#include "shaders/render/exa_wm_src_affine.g7b"
+#include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
+#include "shaders/render/exa_wm_yuv_rgb.g7b"
+#include "shaders/render/exa_wm_write.g7b"
+};
+
#define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct i965_surface_state), 32)
#define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32)
#define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
@@ -257,6 +263,31 @@ static struct i965_kernel render_kernels_gen7[] = {
}
};
+static struct i965_kernel render_kernels_gen7_haswell[] = {
+ {
+ "SF",
+ SF_KERNEL,
+ sf_kernel_static_gen7,
+ sizeof(sf_kernel_static_gen7),
+ NULL
+ },
+ {
+ "PS",
+ PS_KERNEL,
+ ps_kernel_static_gen7_haswell,
+ sizeof(ps_kernel_static_gen7_haswell),
+ NULL
+ },
+
+ {
+ "PS_SUBPIC",
+ PS_SUBPIC_KERNEL,
+ ps_subpic_kernel_static_gen7,
+ sizeof(ps_subpic_kernel_static_gen7),
+ NULL
+ }
+};
+
#define URB_VS_ENTRIES 8
#define URB_VS_ENTRY_SIZE 1
@@ -697,6 +728,16 @@ gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
}
}
+/* Set "Shader Channel Select" */
+void
+gen7_render_set_surface_scs(struct gen7_surface_state *ss)
+{
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+}
+
static void
gen7_render_set_surface_state(
struct gen7_surface_state *ss,
@@ -767,6 +808,8 @@ i965_render_src_surface_state(
region, offset,
w, h,
pitch, format, flags);
+ if (IS_HASWELL(i965->intel.device_id))
+ gen7_render_set_surface_scs(ss);
dri_bo_emit_reloc(ss_bo,
I915_GEM_DOMAIN_SAMPLER, 0,
offset,
@@ -848,16 +891,11 @@ i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct object_surface *obj_surface = SURFACE(surface);
- int w, h;
- dri_bo *region;
dri_bo *subpic_region;
struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
struct object_image *obj_image = IMAGE(obj_subpic->image);
assert(obj_surface);
assert(obj_surface->bo);
- w = obj_surface->width;
- h = obj_surface->height;
- region = obj_surface->bo;
subpic_region = obj_image->bo;
/*subpicture surface*/
i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
@@ -890,6 +928,8 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
dest_region->bo, 0,
dest_region->width, dest_region->height,
dest_region->pitch, format, 0);
+ if (IS_HASWELL(i965->intel.device_id))
+ gen7_render_set_surface_scs(ss);
dri_bo_emit_reloc(ss_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
0,
@@ -911,18 +951,56 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
dri_bo_unmap(ss_bo);
}
+static void
+i965_fill_vertex_buffer(
+ VADriverContextP ctx,
+ float tex_coords[4], /* [(u1,v1);(u2,v2)] */
+ float vid_coords[4] /* [(x1,y1);(x2,y2)] */
+)
+{
+ struct i965_driver_data * const i965 = i965_driver_data(ctx);
+ float vb[12];
+
+ enum { X1, Y1, X2, Y2 };
+
+ static const unsigned int g_rotation_indices[][6] = {
+ [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
+ [VA_ROTATION_90] = { X2, Y1, X2, Y2, X1, Y2 },
+ [VA_ROTATION_180] = { X1, Y1, X2, Y1, X2, Y2 },
+ [VA_ROTATION_270] = { X1, Y2, X1, Y1, X2, Y1 },
+ };
+
+ const unsigned int * const rotation_indices =
+ g_rotation_indices[i965->rotation_attrib->value];
+
+ vb[0] = tex_coords[rotation_indices[0]]; /* bottom-right corner */
+ vb[1] = tex_coords[rotation_indices[1]];
+ vb[2] = vid_coords[X2];
+ vb[3] = vid_coords[Y2];
+
+ vb[4] = tex_coords[rotation_indices[2]]; /* bottom-left corner */
+ vb[5] = tex_coords[rotation_indices[3]];
+ vb[6] = vid_coords[X1];
+ vb[7] = vid_coords[Y2];
+
+ vb[8] = tex_coords[rotation_indices[4]]; /* top-left corner */
+ vb[9] = tex_coords[rotation_indices[5]];
+ vb[10] = vid_coords[X1];
+ vb[11] = vid_coords[Y1];
+
+ dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
+}
+
static void
i965_subpic_render_upload_vertex(VADriverContextP ctx,
VASurfaceID surface,
const VARectangle *output_rect)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- struct i965_render_state *render_state = &i965->render_state;
struct object_surface *obj_surface = SURFACE(surface);
struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
+ float tex_coords[4], vid_coords[4];
VARectangle dst_rect;
- float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
- int i = 0;
if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
dst_rect = obj_subpic->dst_rect;
@@ -935,35 +1013,17 @@ i965_subpic_render_upload_vertex(VADriverContextP ctx,
dst_rect.height = sy * obj_subpic->dst_rect.height;
}
- dri_bo_map(render_state->vb.vertex_buffer, 1);
- assert(render_state->vb.vertex_buffer->virtual);
- vb = render_state->vb.vertex_buffer->virtual;
-
- tx1 = (float)obj_subpic->src_rect.x / obj_subpic->width;
- ty1 = (float)obj_subpic->src_rect.y / obj_subpic->height;
- tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
- ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
+ tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
+ tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
+ tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
+ tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
- x1 = (float)dst_rect.x;
- y1 = (float)dst_rect.y;
- x2 = (float)(dst_rect.x + dst_rect.width);
- y2 = (float)(dst_rect.y + dst_rect.height);
+ vid_coords[0] = dst_rect.x;
+ vid_coords[1] = dst_rect.y;
+ vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
+ vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
- vb[i++] = tx2;
- vb[i++] = ty2;
- vb[i++] = x2;
- vb[i++] = y2;
-
- vb[i++] = tx1;
- vb[i++] = ty2;
- vb[i++] = x1;
- vb[i++] = y2;
-
- vb[i++] = tx1;
- vb[i++] = ty1;
- vb[i++] = x1;
- vb[i++] = y1;
- dri_bo_unmap(render_state->vb.vertex_buffer);
+ i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
}
static void
@@ -978,46 +1038,26 @@ i965_render_upload_vertex(
struct i965_render_state *render_state = &i965->render_state;
struct intel_region *dest_region = render_state->draw_region;
struct object_surface *obj_surface;
- float *vb;
-
- float u1, v1, u2, v2;
- int i, width, height;
- int box_x1 = dest_region->x + dst_rect->x;
- int box_y1 = dest_region->y + dst_rect->y;
- int box_x2 = box_x1 + dst_rect->width;
- int box_y2 = box_y1 + dst_rect->height;
+ float tex_coords[4], vid_coords[4];
+ int width, height;
obj_surface = SURFACE(surface);
assert(surface);
- width = obj_surface->orig_width;
- height = obj_surface->orig_height;
-
- u1 = (float)src_rect->x / width;
- v1 = (float)src_rect->y / height;
- u2 = (float)(src_rect->x + src_rect->width) / width;
- v2 = (float)(src_rect->y + src_rect->height) / height;
- dri_bo_map(render_state->vb.vertex_buffer, 1);
- assert(render_state->vb.vertex_buffer->virtual);
- vb = render_state->vb.vertex_buffer->virtual;
+ width = obj_surface->orig_width;
+ height = obj_surface->orig_height;
- i = 0;
- vb[i++] = u2;
- vb[i++] = v2;
- vb[i++] = (float)box_x2;
- vb[i++] = (float)box_y2;
-
- vb[i++] = u1;
- vb[i++] = v2;
- vb[i++] = (float)box_x1;
- vb[i++] = (float)box_y2;
+ tex_coords[0] = (float)src_rect->x / width;
+ tex_coords[1] = (float)src_rect->y / height;
+ tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
+ tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
- vb[i++] = u1;
- vb[i++] = v1;
- vb[i++] = (float)box_x1;
- vb[i++] = (float)box_y1;
+ vid_coords[0] = dest_region->x + dst_rect->x;
+ vid_coords[1] = dest_region->y + dst_rect->y;
+ vid_coords[2] = vid_coords[0] + dst_rect->width;
+ vid_coords[3] = vid_coords[1] + dst_rect->height;
- dri_bo_unmap(render_state->vb.vertex_buffer);
+ i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
}
static void
@@ -2427,6 +2467,10 @@ gen7_emit_urb(VADriverContextP ctx)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = i965->batch;
+ unsigned int num_urb_entries = 32;
+
+ if (IS_HASWELL(i965->intel.device_id))
+ num_urb_entries = 64;
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
@@ -2436,7 +2480,7 @@ gen7_emit_urb(VADriverContextP ctx)
BEGIN_BATCH(batch, 2);
OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
OUT_BATCH(batch,
- (32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
+ (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
(2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
ADVANCE_BATCH(batch);
@@ -2731,6 +2775,13 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel)
struct i965_driver_data *i965 = i965_driver_data(ctx);
struct intel_batchbuffer *batch = i965->batch;
struct i965_render_state *render_state = &i965->render_state;
+ unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
+ unsigned int num_samples = 0;
+
+ if (IS_HASWELL(i965->intel.device_id)) {
+ max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
+ num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
+ }
BEGIN_BATCH(batch, 3);
OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
@@ -2764,7 +2815,7 @@ gen7_emit_wm_state(VADriverContextP ctx, int kernel)
(5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
OUT_BATCH(batch, 0); /* scratch space base offset */
OUT_BATCH(batch,
- ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
+ ((86 - 1) << max_threads_shift) | num_samples |
GEN7_PS_PUSH_CONSTANT_ENABLE |
GEN7_PS_ATTRIBUTE_ENABLE |
GEN7_PS_16_DISPATCH_ENABLE);
@@ -3012,7 +3063,9 @@ i965_render_init(VADriverContextP ctx)
sizeof(render_kernels_gen6[0])));
if (IS_GEN7(i965->intel.device_id))
- memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels));
+ memcpy(render_state->render_kernels,
+ (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
+ sizeof(render_state->render_kernels));
else if (IS_GEN6(i965->intel.device_id))
memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
else if (IS_IRONLAKE(i965->intel.device_id))
diff --git a/src/i965_render.h b/src/i965_render.h
index 96a1512..c2fc2bf 100644
--- a/src/i965_render.h
+++ b/src/i965_render.h
@@ -98,4 +98,9 @@ intel_render_put_subpicture(
const VARectangle *dst_rect
);
+struct gen7_surface_state;
+
+void
+gen7_render_set_surface_scs(struct gen7_surface_state *ss);
+
#endif /* _I965_RENDER_H_ */
diff --git a/src/i965_structs.h b/src/i965_structs.h
index 12a8d14..77f2570 100644
--- a/src/i965_structs.h
+++ b/src/i965_structs.h
@@ -799,7 +799,8 @@ struct i965_sampler_8x8_coefficient
} dw3;
struct {
- int pad0:16;
+ int table_1x_filter_c0:8;
+ int table_1x_filter_c1:8;
int table_1x_filter_c2:8;
int table_1x_filter_c3:8;
} dw4;
@@ -807,11 +808,13 @@ struct i965_sampler_8x8_coefficient
struct {
int table_1x_filter_c4:8;
int table_1x_filter_c5:8;
- int pad0:16;
+ int table_1x_filter_c6:8;
+ int table_1x_filter_c7:8;
} dw5;
struct {
- int pad0:16;
+ int table_1y_filter_c0:8;
+ int table_1y_filter_c1:8;
int table_1y_filter_c2:8;
int table_1y_filter_c3:8;
} dw6;
@@ -819,7 +822,8 @@ struct i965_sampler_8x8_coefficient
struct {
int table_1y_filter_c4:8;
int table_1y_filter_c5:8;
- int pad0:16;
+ int table_1y_filter_c6:8;
+ int table_1y_filter_c7:8;
} dw7;
};
@@ -1195,7 +1199,11 @@ struct gen7_surface_state
struct {
unsigned int resource_min_lod:12;
- unsigned int pad0:16;
+ unsigned int pad0:4;
+ unsigned int shader_chanel_select_a:3;
+ unsigned int shader_chanel_select_b:3;
+ unsigned int shader_chanel_select_g:3;
+ unsigned int shader_chanel_select_r:3;
unsigned int alpha_clear_color:1;
unsigned int blue_clear_color:1;
unsigned int green_clear_color:1;
@@ -1304,4 +1312,134 @@ struct gen7_surface_state2
} ss7;
};
+struct gen7_sampler_8x8
+{
+ struct {
+ unsigned int global_noise_estimation:8;
+ unsigned int pad0:8;
+ unsigned int chroma_key_index:2;
+ unsigned int chroma_key_enable:1;
+ unsigned int pad1:10;
+ unsigned int ief_bypass:1;
+ unsigned int pad2:1;
+ unsigned int disable_8x8_filter:1;
+ } dw0;
+
+ struct {
+ unsigned int pad0:5;
+ unsigned int sampler_8x8_state_pointer:27;
+ } dw1;
+
+ struct {
+ unsigned int weak_edge_threshold:6;
+ unsigned int pad0:2;
+ unsigned int strong_edge_threshold:6;
+ unsigned int pad1:2;
+ unsigned int r5x_coefficient:5;
+ unsigned int r5cx_coefficient:5;
+ unsigned int r5c_coefficient:5;
+ unsigned int pad2:1;
+ } dw2;
+
+ struct {
+ unsigned int r3x_coefficient:5;
+ unsigned int pad0:1;
+ unsigned int r3c_coefficient:5;
+ unsigned int pad1:3;
+ unsigned int gain_factor:6;
+ unsigned int non_edge_weight:3;
+ unsigned int pad2:1;
+ unsigned int regular_weight:3;
+ unsigned int pad3:1;
+ unsigned int strong_edge_weight:3;
+ unsigned int ief4_smooth_enable:1;
+ } dw3;
+};
+
+struct gen7_sampler_dndi
+{
+ struct {
+ unsigned int denoise_asd_threshold:8;
+ unsigned int dnmh_delt:4;
+ unsigned int vdi_walker_y_stride:2;
+ unsigned int vdi_walker_frame_sharing_enable:1;
+ unsigned int pad0:1;
+ unsigned int denoise_maximum_history:8;
+ unsigned int denoise_stad_threshold:8;
+ } dw0;
+
+ struct {
+ unsigned int denoise_threshold_for_sum_of_complexity_measure:8;
+ unsigned int denoise_moving_pixel_threshold:5;
+ unsigned int stmm_c2:3;
+ unsigned int low_temporal_difference_threshold:6;
+ unsigned int pad0:2;
+ unsigned int temporal_difference_threshold:6;
+ unsigned int pad1:2;
+ } dw1;
+
+ struct {
+ unsigned int block_noise_estimate_noise_threshold:8;
+ unsigned int bne_edge_th:4;
+ unsigned int pad0:2;
+ unsigned int smooth_mv_th:2;
+ unsigned int sad_tight_th:4;
+ unsigned int cat_slope_minus1:4;
+ unsigned int good_neighbor_th:6;
+ unsigned int pad1:2;
+ } dw2;
+
+ struct {
+ unsigned int maximum_stmm:8;
+ unsigned int multipler_for_vecm:6;
+ unsigned int pad0:2;
+ unsigned int blending_constant_across_time_for_small_values_of_stmm:8;
+ unsigned int blending_constant_across_time_for_large_values_of_stmm:7;
+ unsigned int stmm_blending_constant_select:1;
+ } dw3;
+
+ struct {
+ unsigned int sdi_delta:8;
+ unsigned int sdi_threshold:8;
+ unsigned int stmm_output_shift:4;
+ unsigned int stmm_shift_up:2;
+ unsigned int stmm_shift_down:2;
+ unsigned int minimum_stmm:8;
+ } dw4;
+
+ struct {
+ unsigned int fmd_temporal_difference_threshold:8;
+ unsigned int sdi_fallback_mode_2_constant:8;
+ unsigned int sdi_fallback_mode_1_t2_constant:8;
+ unsigned int sdi_fallback_mode_1_t1_constant:8;
+ } dw5;
+
+ struct {
+ unsigned int dn_enable:1;
+ unsigned int di_enable:1;
+ unsigned int di_partial:1;
+ unsigned int dndi_top_first:1;
+ unsigned int dndi_stream_id:1;
+ unsigned int dndi_first_frame:1;
+ unsigned int progressive_dn:1;
+ unsigned int mcdi_enable:1;
+ unsigned int fmd_tear_threshold:6;
+ unsigned int cat_th1:2;
+ unsigned int fmd2_vertical_difference_threshold:8;
+ unsigned int fmd1_vertical_difference_threshold:8;
+ } dw6;
+
+ struct {
+ unsigned int sad_tha:4;
+ unsigned int sad_thb:4;
+ unsigned int fmd_for_1st_field_of_current_frame:2;
+ unsigned int mc_pixel_consistency_th:6;
+ unsigned int fmd_for_2nd_field_of_previous_frame:2;
+ unsigned int vdi_walker_enable:1;
+ unsigned int neighborpixel_th:4;
+ unsigned int column_width_minus1:9;
+ } dw7;
+};
+
+
#endif /* _I965_STRUCTS_H_ */
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 0b52281..94d968c 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -31,15 +31,18 @@
#include "intel_batchbuffer.h"
+#define MAX_BATCH_SIZE 0x400000
+
static void
-intel_batchbuffer_reset(struct intel_batchbuffer *batch)
+intel_batchbuffer_reset(struct intel_batchbuffer *batch, int buffer_size)
{
struct intel_driver_data *intel = batch->intel;
- int batch_size = BATCH_SIZE;
+ int batch_size = buffer_size;
assert(batch->flag == I915_EXEC_RENDER ||
batch->flag == I915_EXEC_BLT ||
- batch->flag == I915_EXEC_BSD);
+ batch->flag == I915_EXEC_BSD ||
+ batch->flag == I915_EXEC_VEBOX);
dri_bo_unreference(batch->buffer);
batch->buffer = dri_bo_alloc(intel->bufmgr,
@@ -63,17 +66,27 @@ intel_batchbuffer_space(struct intel_batchbuffer *batch)
struct intel_batchbuffer *
-intel_batchbuffer_new(struct intel_driver_data *intel, int flag)
+intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size)
{
struct intel_batchbuffer *batch = calloc(1, sizeof(*batch));
assert(flag == I915_EXEC_RENDER ||
flag == I915_EXEC_BSD ||
- flag == I915_EXEC_BLT);
+ flag == I915_EXEC_BLT ||
+ flag == I915_EXEC_VEBOX);
+
+ if (!buffer_size || buffer_size < BATCH_SIZE) {
+ buffer_size = BATCH_SIZE;
+ }
+
+ /* the buffer size can't exceed 4M */
+ if (buffer_size > MAX_BATCH_SIZE) {
+ buffer_size = MAX_BATCH_SIZE;
+ }
batch->intel = intel;
batch->flag = flag;
batch->run = drm_intel_bo_mrb_exec;
- intel_batchbuffer_reset(batch);
+ intel_batchbuffer_reset(batch, buffer_size);
return batch;
}
@@ -108,7 +121,7 @@ intel_batchbuffer_flush(struct intel_batchbuffer *batch)
dri_bo_unmap(batch->buffer);
used = batch->ptr - batch->map;
batch->run(batch->buffer, used, 0, 0, 0, batch->flag);
- intel_batchbuffer_reset(batch);
+ intel_batchbuffer_reset(batch, batch->size);
}
void
@@ -188,6 +201,13 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
OUT_BLT_BATCH(batch, 0);
OUT_BLT_BATCH(batch, 0);
ADVANCE_BLT_BATCH(batch);
+ }else if (batch->flag == I915_EXEC_VEBOX) {
+ BEGIN_VEB_BATCH(batch, 4);
+ OUT_VEB_BATCH(batch, MI_FLUSH_DW);
+ OUT_VEB_BATCH(batch, 0);
+ OUT_VEB_BATCH(batch, 0);
+ OUT_VEB_BATCH(batch, 0);
+ ADVANCE_VEB_BATCH(batch);
} else {
assert(batch->flag == I915_EXEC_BSD);
BEGIN_BCS_BATCH(batch, 4);
@@ -202,8 +222,8 @@ intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
if (batch->flag == I915_EXEC_RENDER) {
BEGIN_BATCH(batch, 1);
OUT_BATCH(batch, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
- ADVANCE_BATCH(batch);
- } else {
+ ADVANCE_BATCH(batch);
+ } else {
assert(batch->flag == I915_EXEC_BSD);
BEGIN_BCS_BATCH(batch, 1);
OUT_BCS_BATCH(batch, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
@@ -230,7 +250,8 @@ intel_batchbuffer_check_batchbuffer_flag(struct intel_batchbuffer *batch, int fl
{
if (flag != I915_EXEC_RENDER &&
flag != I915_EXEC_BLT &&
- flag != I915_EXEC_BSD)
+ flag != I915_EXEC_BSD &&
+ flag != I915_EXEC_VEBOX)
return;
if (batch->flag == flag)
@@ -276,8 +297,39 @@ intel_batchbuffer_start_atomic_bcs(struct intel_batchbuffer *batch, unsigned int
}
void
+intel_batchbuffer_start_atomic_veb(struct intel_batchbuffer *batch, unsigned int size)
+{
+ intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_VEBOX, size);
+}
+
+
+void
intel_batchbuffer_end_atomic(struct intel_batchbuffer *batch)
{
assert(batch->atomic);
batch->atomic = 0;
}
+
+int
+intel_batchbuffer_used_size(struct intel_batchbuffer *batch)
+{
+ return batch->ptr - batch->map;
+}
+
+void
+intel_batchbuffer_align(struct intel_batchbuffer *batch, unsigned int alignedment)
+{
+ int used = batch->ptr - batch->map;
+ int pad_size;
+
+ assert((alignedment & 3) == 0);
+ pad_size = ALIGN(used, alignedment) - used;
+ assert((pad_size & 3) == 0);
+ assert(intel_batchbuffer_space(batch) >= pad_size);
+
+ while (pad_size >= 4) {
+ intel_batchbuffer_emit_dword(batch, 0);
+ pad_size -= 4;
+ }
+}
+
diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h
index 092da5a..70ceddb 100644
--- a/src/intel_batchbuffer.h
+++ b/src/intel_batchbuffer.h
@@ -26,11 +26,12 @@ struct intel_batchbuffer
int DR4, unsigned int ring_flag);
};
-struct intel_batchbuffer *intel_batchbuffer_new(struct intel_driver_data *intel, int flag);
+struct intel_batchbuffer *intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size);
void intel_batchbuffer_free(struct intel_batchbuffer *batch);
void intel_batchbuffer_start_atomic(struct intel_batchbuffer *batch, unsigned int size);
void intel_batchbuffer_start_atomic_bcs(struct intel_batchbuffer *batch, unsigned int size);
void intel_batchbuffer_start_atomic_blt(struct intel_batchbuffer *batch, unsigned int size);
+void intel_batchbuffer_start_atomic_veb(struct intel_batchbuffer *batch, unsigned int size);
void intel_batchbuffer_end_atomic(struct intel_batchbuffer *batch);
void intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, unsigned int x);
void intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *bo,
@@ -44,6 +45,8 @@ void intel_batchbuffer_begin_batch(struct intel_batchbuffer *batch, int total);
void intel_batchbuffer_advance_batch(struct intel_batchbuffer *batch);
void intel_batchbuffer_check_batchbuffer_flag(struct intel_batchbuffer *batch, int flag);
int intel_batchbuffer_check_free_space(struct intel_batchbuffer *batch, int size);
+int intel_batchbuffer_used_size(struct intel_batchbuffer *batch);
+void intel_batchbuffer_align(struct intel_batchbuffer *batch, unsigned int alignedment);
#define __BEGIN_BATCH(batch, n, f) do { \
assert(f == batch->flag); \
@@ -70,11 +73,12 @@ int intel_batchbuffer_check_free_space(struct intel_batchbuffer *batch, int size
#define BEGIN_BATCH(batch, n) __BEGIN_BATCH(batch, n, I915_EXEC_RENDER)
#define BEGIN_BLT_BATCH(batch, n) __BEGIN_BATCH(batch, n, I915_EXEC_BLT)
#define BEGIN_BCS_BATCH(batch, n) __BEGIN_BATCH(batch, n, I915_EXEC_BSD)
-
+#define BEGIN_VEB_BATCH(batch, n) __BEGIN_BATCH(batch, n, I915_EXEC_VEBOX)
#define OUT_BATCH(batch, d) __OUT_BATCH(batch, d)
#define OUT_BLT_BATCH(batch, d) __OUT_BATCH(batch, d)
#define OUT_BCS_BATCH(batch, d) __OUT_BATCH(batch, d)
+#define OUT_VEB_BATCH(batch, d) __OUT_BATCH(batch, d)
#define OUT_RELOC(batch, bo, read_domains, write_domain, delta) \
__OUT_RELOC(batch, bo, read_domains, write_domain, delta)
@@ -86,5 +90,6 @@ int intel_batchbuffer_check_free_space(struct intel_batchbuffer *batch, int size
#define ADVANCE_BATCH(batch) __ADVANCE_BATCH(batch)
#define ADVANCE_BLT_BATCH(batch) __ADVANCE_BATCH(batch)
#define ADVANCE_BCS_BATCH(batch) __ADVANCE_BATCH(batch)
+#define ADVANCE_VEB_BATCH(batch) __ADVANCE_BATCH(batch)
#endif /* _INTEL_BATCHBUFFER_H_ */
diff --git a/src/intel_driver.c b/src/intel_driver.c
index 4e6df81..c150dc5 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -27,10 +27,7 @@
*
*/
-#include <assert.h>
-
-#include <va/va_dricommon.h>
-
+#include "sysdeps.h"
#include "intel_batchbuffer.h"
#include "intel_memman.h"
#include "intel_driver.h"
@@ -46,19 +43,42 @@ intel_driver_get_param(struct intel_driver_data *intel, int param, int *value)
return drmCommandWriteRead(intel->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)) == 0;
}
+static void intel_driver_get_revid(struct intel_driver_data *intel, int *value)
+{
+#define PCI_REVID 8
+ FILE *fp;
+ char config_data[16];
+
+ fp = fopen("/sys/devices/pci0000:00/0000:00:02.0/config", "r");
+
+ if (fp) {
+ if (fread(config_data, 1, 16, fp))
+ *value = config_data[PCI_REVID];
+ else
+ *value = 2; /* assume it is at least B-steping */
+ fclose(fp);
+ } else {
+ *value = 2; /* assume it is at least B-steping */
+ }
+
+ return;
+}
+
Bool
intel_driver_init(VADriverContextP ctx)
{
struct intel_driver_data *intel = intel_driver_data(ctx);
- struct dri_state *dri_state = (struct dri_state *)ctx->dri_state;
+ struct drm_state * const drm_state = (struct drm_state *)ctx->drm_state;
int has_exec2, has_bsd, has_blt;
- assert(dri_state);
- assert(dri_state->driConnectedFlag == VA_DRI2 ||
- dri_state->driConnectedFlag == VA_DRI1);
+ assert(drm_state);
+ assert(VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI1) ||
+ VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI2) ||
+ VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_CUSTOM));
- intel->fd = dri_state->fd;
- intel->dri2Enabled = (dri_state->driConnectedFlag == VA_DRI2);
+ intel->fd = drm_state->fd;
+ intel->dri2Enabled = (VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_DRI2) ||
+ VA_CHECK_DRM_AUTH_TYPE(ctx, VA_DRM_AUTH_CUSTOM));
if (!intel->dri2Enabled) {
return False;
@@ -74,7 +94,8 @@ intel_driver_init(VADriverContextP ctx)
intel->has_bsd = has_bsd;
if (intel_driver_get_param(intel, I915_PARAM_HAS_BLT, &has_blt))
intel->has_blt = has_blt;
-
+
+ intel_driver_get_revid(intel, &intel->revision);
intel_memman_init(intel);
return True;
}
diff --git a/src/intel_driver.h b/src/intel_driver.h
index 339ff3f..8d83469 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -10,6 +10,7 @@
#include <intel_bufmgr.h>
#include <va/va_backend.h>
+#include "va_backend_compat.h"
#include "intel_compiler.h"
@@ -37,7 +38,9 @@
#define XY_COLOR_BLT_DST_TILED (1 << 11)
/* BR13 */
+#define BR13_8 (0x0 << 24)
#define BR13_565 (0x1 << 24)
+#define BR13_1555 (0x2 << 24)
#define BR13_8888 (0x3 << 24)
#define CMD_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | (0 << 16))
@@ -104,6 +107,7 @@ struct intel_driver_data
{
int fd;
int device_id;
+ int revision;
int dri2Enabled;
@@ -168,6 +172,46 @@ struct intel_region
#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */
#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a
+#define PCI_CHIP_HASWELL_GT1 0x0402 /* Desktop */
+#define PCI_CHIP_HASWELL_GT2 0x0412
+#define PCI_CHIP_HASWELL_GT2_PLUS 0x0422
+#define PCI_CHIP_HASWELL_M_GT1 0x0406 /* Mobile */
+#define PCI_CHIP_HASWELL_M_GT2 0x0416
+#define PCI_CHIP_HASWELL_M_GT2_PLUS 0x0426
+#define PCI_CHIP_HASWELL_S_GT1 0x040a /* Server */
+#define PCI_CHIP_HASWELL_S_GT2 0x041a
+#define PCI_CHIP_HASWELL_S_GT2_PLUS 0x042a
+
+#define PCI_CHIP_HASWELL_SDV_GT1 0x0c02 /* Desktop */
+#define PCI_CHIP_HASWELL_SDV_GT2 0x0c12
+#define PCI_CHIP_HASWELL_SDV_GT2_PLUS 0x0c22
+#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0c06 /* Mobile */
+#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0c16
+#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0c26
+#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0c0a /* Server */
+#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0c1a
+#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0c2a
+
+#define PCI_CHIP_HASWELL_ULT_GT1 0x0A02 /* Desktop */
+#define PCI_CHIP_HASWELL_ULT_GT2 0x0A12
+#define PCI_CHIP_HASWELL_ULT_GT2_PLUS 0x0A22
+#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 /* Mobile */
+#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16
+#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26
+#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */
+#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A
+#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A
+
+#define PCI_CHIP_HASWELL_CRW_GT1 0x0D12 /* Desktop */
+#define PCI_CHIP_HASWELL_CRW_GT2 0x0D22
+#define PCI_CHIP_HASWELL_CRW_GT2_PLUS 0x0D32
+#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16 /* Mobile */
+#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26
+#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36
+#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A /* Server */
+#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A
+#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A
+
#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
devid == PCI_CHIP_Q45_G || \
devid == PCI_CHIP_G45_G || \
@@ -182,6 +226,51 @@ struct intel_region
#define IS_IRONLAKE_M(devid) (devid == PCI_CHIP_IRONLAKE_M_G)
#define IS_IRONLAKE(devid) (IS_IRONLAKE_D(devid) || IS_IRONLAKE_M(devid))
+#define IS_HASWELL_ULT(devid) (devid == PCI_CHIP_HASWELL_ULT_GT1 || \
+ devid == PCI_CHIP_HASWELL_ULT_GT2 || \
+ devid == PCI_CHIP_HASWELL_ULT_GT2_PLUS || \
+ devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \
+ devid == PCI_CHIP_HASWELL_ULT_M_GT2 || \
+ devid == PCI_CHIP_HASWELL_ULT_M_GT2_PLUS || \
+ devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \
+ devid == PCI_CHIP_HASWELL_ULT_S_GT2 || \
+ devid == PCI_CHIP_HASWELL_ULT_S_GT2_PLUS)
+
+#define IS_HSW_GT1(devid) (devid == PCI_CHIP_HASWELL_GT1 || \
+ devid == PCI_CHIP_HASWELL_M_GT1 || \
+ devid == PCI_CHIP_HASWELL_S_GT1 || \
+ devid == PCI_CHIP_HASWELL_SDV_GT1 || \
+ devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \
+ devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \
+ devid == PCI_CHIP_HASWELL_CRW_GT1 || \
+ devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \
+ devid == PCI_CHIP_HASWELL_CRW_S_GT1)
+
+#define IS_HSW_GT2(devid) (devid == PCI_CHIP_HASWELL_GT2 || \
+ devid == PCI_CHIP_HASWELL_M_GT2 || \
+ devid == PCI_CHIP_HASWELL_S_GT2 || \
+ devid == PCI_CHIP_HASWELL_SDV_GT2 || \
+ devid == PCI_CHIP_HASWELL_SDV_M_GT2 || \
+ devid == PCI_CHIP_HASWELL_SDV_S_GT2 || \
+ devid == PCI_CHIP_HASWELL_CRW_GT2 || \
+ devid == PCI_CHIP_HASWELL_CRW_M_GT2 || \
+ devid == PCI_CHIP_HASWELL_CRW_S_GT2)
+
+#define IS_HSW_GT2_PLUS(devid) (devid == PCI_CHIP_HASWELL_GT2_PLUS || \
+ devid == PCI_CHIP_HASWELL_M_GT2_PLUS || \
+ devid == PCI_CHIP_HASWELL_S_GT2_PLUS || \
+ devid == PCI_CHIP_HASWELL_SDV_GT2_PLUS || \
+ devid == PCI_CHIP_HASWELL_SDV_M_GT2_PLUS || \
+ devid == PCI_CHIP_HASWELL_SDV_S_GT2_PLUS || \
+ devid == PCI_CHIP_HASWELL_CRW_GT2_PLUS || \
+ devid == PCI_CHIP_HASWELL_CRW_M_GT2_PLUS || \
+ devid == PCI_CHIP_HASWELL_CRW_S_GT2_PLUS)
+
+#define IS_HASWELL(devid) (IS_HSW_GT1(devid) || \
+ IS_HSW_GT2(devid) || \
+ IS_HSW_GT2_PLUS(devid) || \
+ IS_HASWELL_ULT(devid))
+
#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS ||\
@@ -195,6 +284,11 @@ struct intel_region
devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \
devid == PCI_CHIP_IVYBRIDGE_S_GT1 || \
- devid == PCI_CHIP_IVYBRIDGE_S_GT2)
+ devid == PCI_CHIP_IVYBRIDGE_S_GT2 || \
+ IS_HASWELL(devid))
+
+#ifndef I915_EXEC_VEBOX
+#define I915_EXEC_VEBOX 4
+#endif
#endif /* _INTEL_DRIVER_H_ */
diff --git a/src/shaders/h264/mc/avc_mc.g4b.gen5 b/src/shaders/h264/mc/avc_mc.g4b.gen5
index cdee6ac..7048e1f 100644
--- a/src/shaders/h264/mc/avc_mc.g4b.gen5
+++ b/src/shaders/h264/mc/avc_mc.g4b.gen5
@@ -657,7 +657,7 @@
{ 0x00800001, 0x21300232, 0x00a904cc, 0x00000000 },
{ 0x01600031, 0x27800021, 0x508d07c0, 0x00000200 },
{ 0x00000001, 0x34000020, 0x000007e0, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27c00021, 0x008d0000, 0x00000000 },
{ 0x00200009, 0x27fa3e29, 0x00450064, 0x00040004 },
{ 0x00000401, 0x27c80061, 0x00000000, 0x000f000f },
@@ -687,9 +687,9 @@
{ 0x00000001, 0x27800021, 0x00000780, 0x00000000 },
{ 0x00000001, 0x27a00021, 0x000007a0, 0x00000000 },
{ 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20400021, 0x008d0000, 0x00000000 },
{ 0x00000005, 0x20203e2d, 0x00000061, 0x001f001f },
{ 0x00200009, 0x20643e2d, 0x00450064, 0x00040004 },
@@ -1817,7 +1817,7 @@
{ 0x00000001, 0x21400021, 0x00000140, 0x00000000 },
{ 0x00000001, 0x21600021, 0x00000160, 0x00000000 },
{ 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20400021, 0x008d0000, 0x00000000 },
{ 0x00000005, 0x20203e2d, 0x00000061, 0x001f001f },
{ 0x00200009, 0x20643e2d, 0x00450064, 0x00040004 },
@@ -2393,7 +2393,7 @@
{ 0x00000001, 0x21400021, 0x00000140, 0x00000000 },
{ 0x00000001, 0x21600021, 0x00000160, 0x00000000 },
{ 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x01000005, 0x20602d29, 0x02000042, 0x00070007 },
{ 0x01000005, 0x20422d29, 0x00000042, 0xfff8fff8 },
{ 0x00000001, 0x202001e9, 0x00000000, 0x100c100c },
@@ -2563,9 +2563,9 @@
{ 0x00000040, 0x22040c00, 0x00000204, 0x00400040 },
{ 0x00110220, 0x34001c00, 0x00001400, 0xffffffda },
{ 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x01000005, 0x20602d29, 0x02000042, 0x00070007 },
{ 0x01000005, 0x20422d29, 0x00000042, 0xfff8fff8 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
@@ -2935,9 +2935,9 @@
{ 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
{ 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
{ 0x01600031, 0x20001c20, 0x708d0000, 0x82000010 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
{ 0x00000001, 0x206601ed, 0x00000000, 0x00030003 },
@@ -3024,8 +3024,8 @@
{ 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc },
{ 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
{ 0x01600031, 0x20001c24, 0x708d0000, 0x82000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00600009, 0x25c03dad, 0x00050034, 0x00040004 },
{ 0x00000001, 0x220801ec, 0x00000000, 0x02400240 },
@@ -3355,9 +3355,9 @@
{ 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
{ 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
{ 0x00000001, 0x34000020, 0x000007c0, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
{ 0x00000001, 0x206601ed, 0x00000000, 0x00030003 },
@@ -3441,7 +3441,7 @@
{ 0x00000220, 0x34001c00, 0x00001400, 0xffffffe8 },
{ 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
{ 0x01600031, 0x20001c24, 0x708d0000, 0x82000012 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00600009, 0x25c03dad, 0x00050034, 0x00040004 },
{ 0x00000001, 0x220801ec, 0x00000000, 0x02400240 },
@@ -3657,7 +3657,7 @@
{ 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
{ 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
{ 0x00000001, 0x34000020, 0x000007c0, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
{ 0x00000001, 0x206601ed, 0x00000000, 0x00030003 },
@@ -3744,8 +3744,8 @@
{ 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc },
{ 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
{ 0x01600031, 0x20001c24, 0x708d0000, 0x82000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00600009, 0x25c03dad, 0x00050034, 0x00040004 },
{ 0x00000001, 0x220801ec, 0x00000000, 0x02400240 },
@@ -3984,8 +3984,8 @@
{ 0x00000c01, 0x26100169, 0x00000000, 0x12121212 },
{ 0x00400801, 0x26000171, 0x00000000, 0xffffffff },
{ 0x00600031, 0x20001c20, 0x308d0600, 0x82008002 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
{ 0x00000001, 0x206601ed, 0x00000000, 0x00030003 },
@@ -4069,7 +4069,7 @@
{ 0x00000220, 0x34001c00, 0x00001400, 0xffffffe8 },
{ 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
{ 0x01600031, 0x20001c24, 0x708d0000, 0x82000012 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00600009, 0x25c03dad, 0x00050034, 0x00040004 },
{ 0x00000001, 0x220801ec, 0x00000000, 0x02400240 },
@@ -4268,8 +4268,8 @@
{ 0x00000c01, 0x26100169, 0x00000000, 0x12121212 },
{ 0x00400801, 0x26000171, 0x00000000, 0xffffffff },
{ 0x00600031, 0x20001c20, 0x308d0600, 0x82008002 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
{ 0x00000001, 0x206601ed, 0x00000000, 0x00010001 },
@@ -4357,7 +4357,7 @@
{ 0x00010220, 0x34001c00, 0x00001400, 0xfffffffc },
{ 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
{ 0x01600031, 0x20001c24, 0x708d0000, 0x82000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00400441, 0x45c03dad, 0x00000034, 0x00100010 },
{ 0x00400841, 0x45c23dad, 0x00000036, 0x00200020 },
@@ -4796,8 +4796,8 @@
{ 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
{ 0x00008025, 0x20000000, 0x00000000, 0x00000000 },
{ 0x00000001, 0x34000020, 0x000007c0, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00a02001, 0x20400169, 0x00000000, 0x00000000 },
{ 0x00000001, 0x206601ed, 0x00000000, 0x00010001 },
@@ -4881,7 +4881,7 @@
{ 0x00000220, 0x34001c00, 0x00001400, 0xffffffe8 },
{ 0x07600031, 0x20001c20, 0x30000000, 0x02000001 },
{ 0x01600031, 0x20001c24, 0x708d0000, 0x82000012 },
- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x00600001, 0x27e00021, 0x008d0000, 0x00000000 },
{ 0x00400441, 0x45c03dad, 0x00000034, 0x00100010 },
{ 0x00400841, 0x45c23dad, 0x00000036, 0x00200020 },
diff --git a/src/shaders/post_processing/Common/PA_Load_8x8.asm b/src/shaders/post_processing/Common/PA_Load_8x8.asm
deleted file mode 100644
index 3569bd1..0000000
--- a/src/shaders/post_processing/Common/PA_Load_8x8.asm
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * All Video Processing kernels
- * Copyright © <2010>, Intel Corporation.
- *
- * This program is licensed under the terms and conditions of the
- * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
- * http://www.opensource.org/licenses/eclipse-1.0.php.
- *
- */
-
-// Module name: PA_Load_8x8.asm
-//----------------------------------------------------------------
-
-#define PA_LOAD_8x8
-#include "PA_Load.inc"
-
-// Load 16x8 packed data block
-// Packed data block should be loaded as 32x8 pixel block
- add (2) rMSGSRC.0<1>:d wORIX<2;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Block origin
- shl (1) rMSGSRC.0<1>:d acc0:w 1:w // H. block origin need to be doubled
- mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_YUV:ud // Block width and height (32x8)
- mov (8) mMSGHDRY<1>:ud rMSGSRC<8;8,1>:ud
- send (8) udSRC_YUV(0)<1> mMSGHDRY udDUMMY_NULL nDATAPORT_READ nDPMR_MSGDSC+nDPR_MSG_SIZE_YUV+nBI_CURRENT_SRC_YUV:ud
-
-// Unpack to "planar" YUV422 format in word-aligned bytes
- add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub nSRC_YUV_REG*nGRFWIB:w // Initial Y,U,V offset in YUV422 block
- $for(0; <nY_NUM_OF_ROWS; 1) {
- mov (16) uwDEST_Y(0, %1*16)<1> r[pCF_Y_OFFSET, %1*nGRFWIB]REGION(16,2)
- mov (8) uwDEST_U(0, %1*8)<1> r[pCF_U_OFFSET, %1*nGRFWIB]REGION(8,4)
- mov (8) uwDEST_V(0, %1*8)<1> r[pCF_V_OFFSET, %1*nGRFWIB]REGION(8,4)
- }
-
-// End of PA_Load_8x8
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm b/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm
deleted file mode 100644
index 6e67557..0000000
--- a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * All Video Processing kernels
- * Copyright © <2010>, Intel Corporation.
- *
- * This program is licensed under the terms and conditions of the
- * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
- * http://www.opensource.org/licenses/eclipse-1.0.php.
- *
- */
-
-//---------- PL2_AVS_IEF_Unpack_16x8.asm ----------
-
-#ifdef AVS_OUTPUT_16_BIT //Output is packed in AVYU format
-// Move first 8x8 words of Y to dest GRF (as packed)
- mov (4) uwDEST_Y(0,1)<4> uwAVS_RESPONSE(0,0)<4;4,1>
- mov (4) uwDEST_Y(1,1)<4> uwAVS_RESPONSE(0,8)<4;4,1>
- mov (4) uwDEST_Y(4,1)<4> uwAVS_RESPONSE(0,4)<4;4,1>
- mov (4) uwDEST_Y(5,1)<4> uwAVS_RESPONSE(0,12)<4;4,1>
- mov (4) uwDEST_Y(8,1)<4> uwAVS_RESPONSE(1,0)<4;4,1>
- mov (4) uwDEST_Y(9,1)<4> uwAVS_RESPONSE(1,8)<4;4,1>
- mov (4) uwDEST_Y(12,1)<4> uwAVS_RESPONSE(1,4)<4;4,1>
- mov (4) uwDEST_Y(13,1)<4> uwAVS_RESPONSE(1,12)<4;4,1>
- mov (4) uwDEST_Y(16,1)<4> uwAVS_RESPONSE(2,0)<4;4,1>
- mov (4) uwDEST_Y(17,1)<4> uwAVS_RESPONSE(2,8)<4;4,1>
- mov (4) uwDEST_Y(20,1)<4> uwAVS_RESPONSE(2,4)<4;4,1>
- mov (4) uwDEST_Y(21,1)<4> uwAVS_RESPONSE(2,12)<4;4,1>
- mov (4) uwDEST_Y(24,1)<4> uwAVS_RESPONSE(3,0)<4;4,1>
- mov (4) uwDEST_Y(25,1)<4> uwAVS_RESPONSE(3,8)<4;4,1>
- mov (4) uwDEST_Y(28,1)<4> uwAVS_RESPONSE(3,4)<4;4,1>
- mov (4) uwDEST_Y(29,1)<4> uwAVS_RESPONSE(3,12)<4;4,1>
-
-// Move first 8x8 words of U to dest GRF (as packed)
- mov (4) uwDEST_Y(0,0)<4> uwAVS_RESPONSE(4,0)<4;4,1>
- mov (4) uwDEST_Y(1,0)<4> uwAVS_RESPONSE(4,8)<4;4,1>
- mov (4) uwDEST_Y(4,0)<4> uwAVS_RESPONSE(4,4)<4;4,1>
- mov (4) uwDEST_Y(5,0)<4> uwAVS_RESPONSE(4,12)<4;4,1>
- mov (4) uwDEST_Y(8,0)<4> uwAVS_RESPONSE(5,0)<4;4,1>
- mov (4) uwDEST_Y(9,0)<4> uwAVS_RESPONSE(5,8)<4;4,1>
- mov (4) uwDEST_Y(12,0)<4> uwAVS_RESPONSE(5,4)<4;4,1>
- mov (4) uwDEST_Y(13,0)<4> uwAVS_RESPONSE(5,12)<4;4,1>
- mov (4) uwDEST_Y(16,0)<4> uwAVS_RESPONSE(8,0)<4;4,1>
- mov (4) uwDEST_Y(17,0)<4> uwAVS_RESPONSE(8,8)<4;4,1>
- mov (4) uwDEST_Y(20,0)<4> uwAVS_RESPONSE(8,4)<4;4,1>
- mov (4) uwDEST_Y(21,0)<4> uwAVS_RESPONSE(8,12)<4;4,1>
- mov (4) uwDEST_Y(24,0)<4> uwAVS_RESPONSE(9,0)<4;4,1>
- mov (4) uwDEST_Y(25,0)<4> uwAVS_RESPONSE(9,8)<4;4,1>
- mov (4) uwDEST_Y(28,0)<4> uwAVS_RESPONSE(9,4)<4;4,1>
- mov (4) uwDEST_Y(29,0)<4> uwAVS_RESPONSE(9,12)<4;4,1>
-
-// Move first 8x8 words of V to dest GRF (as packed)
- mov (4) uwDEST_Y(0,2)<4> uwAVS_RESPONSE(6,0)<4;4,1>
- mov (4) uwDEST_Y(1,2)<4> uwAVS_RESPONSE(6,8)<4;4,1>
- mov (4) uwDEST_Y(4,2)<4> uwAVS_RESPONSE(6,4)<4;4,1>
- mov (4) uwDEST_Y(5,2)<4> uwAVS_RESPONSE(6,12)<4;4,1>
- mov (4) uwDEST_Y(8,2)<4> uwAVS_RESPONSE(7,0)<4;4,1>
- mov (4) uwDEST_Y(9,2)<4> uwAVS_RESPONSE(7,8)<4;4,1>
- mov (4) uwDEST_Y(12,2)<4> uwAVS_RESPONSE(7,4)<4;4,1>
- mov (4) uwDEST_Y(13,2)<4> uwAVS_RESPONSE(7,12)<4;4,1>
- mov (4) uwDEST_Y(16,2)<4> uwAVS_RESPONSE(10,0)<4;4,1>
- mov (4) uwDEST_Y(17,2)<4> uwAVS_RESPONSE(10,8)<4;4,1>
- mov (4) uwDEST_Y(20,2)<4> uwAVS_RESPONSE(10,4)<4;4,1>
- mov (4) uwDEST_Y(21,2)<4> uwAVS_RESPONSE(10,12)<4;4,1>
- mov (4) uwDEST_Y(24,2)<4> uwAVS_RESPONSE(11,0)<4;4,1>
- mov (4) uwDEST_Y(25,2)<4> uwAVS_RESPONSE(11,8)<4;4,1>
- mov (4) uwDEST_Y(28,2)<4> uwAVS_RESPONSE(11,4)<4;4,1>
- mov (4) uwDEST_Y(29,2)<4> uwAVS_RESPONSE(11,12)<4;4,1>
-
-// Move first 8x8 words of A to dest GRF (as packed)
- mov (4) uwDEST_Y(0,3)<4> 0:uw
- mov (4) uwDEST_Y(1,3)<4> 0:uw
- mov (4) uwDEST_Y(4,3)<4> 0:uw
- mov (4) uwDEST_Y(5,3)<4> 0:uw
- mov (4) uwDEST_Y(8,3)<4> 0:uw
- mov (4) uwDEST_Y(9,3)<4> 0:uw
- mov (4) uwDEST_Y(12,3)<4> 0:uw
- mov (4) uwDEST_Y(13,3)<4> 0:uw
- mov (4) uwDEST_Y(16,3)<4> 0:uw
- mov (4) uwDEST_Y(17,3)<4> 0:uw
- mov (4) uwDEST_Y(20,3)<4> 0:uw
- mov (4) uwDEST_Y(21,3)<4> 0:uw
- mov (4) uwDEST_Y(24,3)<4> 0:uw
- mov (4) uwDEST_Y(25,3)<4> 0:uw
- mov (4) uwDEST_Y(28,3)<4> 0:uw
- mov (4) uwDEST_Y(29,3)<4> 0:uw
-
-// Move second 8x8 words of Y to dest GRF
- mov (4) uwDEST_Y(2,1)<4> uwAVS_RESPONSE_2(0,0)<4;4,1>
- mov (4) uwDEST_Y(3,1)<4> uwAVS_RESPONSE_2(0,8)<4;4,1>
- mov (4) uwDEST_Y(6,1)<4> uwAVS_RESPONSE_2(0,4)<4;4,1>
- mov (4) uwDEST_Y(7,1)<4> uwAVS_RESPONSE_2(0,12)<4;4,1>
- mov (4) uwDEST_Y(10,1)<4> uwAVS_RESPONSE_2(1,0)<4;4,1>
- mov (4) uwDEST_Y(11,1)<4> uwAVS_RESPONSE_2(1,8)<4;4,1>
- mov (4) uwDEST_Y(14,1)<4> uwAVS_RESPONSE_2(1,4)<4;4,1>
- mov (4) uwDEST_Y(15,1)<4> uwAVS_RESPONSE_2(1,12)<4;4,1>
- mov (4) uwDEST_Y(18,1)<4> uwAVS_RESPONSE_2(2,0)<4;4,1>
- mov (4) uwDEST_Y(19,1)<4> uwAVS_RESPONSE_2(2,8)<4;4,1>
- mov (4) uwDEST_Y(22,1)<4> uwAVS_RESPONSE_2(2,4)<4;4,1>
- mov (4) uwDEST_Y(23,1)<4> uwAVS_RESPONSE_2(2,12)<4;4,1>
- mov (4) uwDEST_Y(26,1)<4> uwAVS_RESPONSE_2(3,0)<4;4,1>
- mov (4) uwDEST_Y(27,1)<4> uwAVS_RESPONSE_2(3,8)<4;4,1>
- mov (4) uwDEST_Y(30,1)<4> uwAVS_RESPONSE_2(3,4)<4;4,1>
- mov (4) uwDEST_Y(31,1)<4> uwAVS_RESPONSE_2(3,12)<4;4,1>
-
-// Move second 8x8 words of U to dest GRF
- mov (4) uwDEST_Y(2,0)<4> uwAVS_RESPONSE_2(4,0)<4;4,1>
- mov (4) uwDEST_Y(3,0)<4> uwAVS_RESPONSE_2(4,8)<4;4,1>
- mov (4) uwDEST_Y(6,0)<4> uwAVS_RESPONSE_2(4,4)<4;4,1>
- mov (4) uwDEST_Y(7,0)<4> uwAVS_RESPONSE_2(4,12)<4;4,1>
- mov (4) uwDEST_Y(10,0)<4> uwAVS_RESPONSE_2(5,0)<4;4,1>
- mov (4) uwDEST_Y(11,0)<4> uwAVS_RESPONSE_2(5,8)<4;4,1>
- mov (4) uwDEST_Y(14,0)<4> uwAVS_RESPONSE_2(5,4)<4;4,1>
- mov (4) uwDEST_Y(15,0)<4> uwAVS_RESPONSE_2(5,12)<4;4,1>
- mov (4) uwDEST_Y(18,0)<4> uwAVS_RESPONSE_2(8,0)<4;4,1>
- mov (4) uwDEST_Y(19,0)<4> uwAVS_RESPONSE_2(8,8)<4;4,1>
- mov (4) uwDEST_Y(22,0)<4> uwAVS_RESPONSE_2(8,4)<4;4,1>
- mov (4) uwDEST_Y(23,0)<4> uwAVS_RESPONSE_2(8,12)<4;4,1>
- mov (4) uwDEST_Y(26,0)<4> uwAVS_RESPONSE_2(9,0)<4;4,1>
- mov (4) uwDEST_Y(27,0)<4> uwAVS_RESPONSE_2(9,8)<4;4,1>
- mov (4) uwDEST_Y(30,0)<4> uwAVS_RESPONSE_2(9,4)<4;4,1>
- mov (4) uwDEST_Y(31,0)<4> uwAVS_RESPONSE_2(9,12)<4;4,1>
-
-// Move second 8x8 words of V to dest GRF
- mov (4) uwDEST_Y(2,2)<4> uwAVS_RESPONSE_2(6,0)<4;4,1>
- mov (4) uwDEST_Y(3,2)<4> uwAVS_RESPONSE_2(6,8)<4;4,1>
- mov (4) uwDEST_Y(6,2)<4> uwAVS_RESPONSE_2(6,4)<4;4,1>
- mov (4) uwDEST_Y(7,2)<4> uwAVS_RESPONSE_2(6,12)<4;4,1>
- mov (4) uwDEST_Y(10,2)<4> uwAVS_RESPONSE_2(7,0)<4;4,1>
- mov (4) uwDEST_Y(11,2)<4> uwAVS_RESPONSE_2(7,8)<4;4,1>
- mov (4) uwDEST_Y(14,2)<4> uwAVS_RESPONSE_2(7,4)<4;4,1>
- mov (4) uwDEST_Y(15,2)<4> uwAVS_RESPONSE_2(7,12)<4;4,1>
- mov (4) uwDEST_Y(18,2)<4> uwAVS_RESPONSE_2(10,0)<4;4,1>
- mov (4) uwDEST_Y(19,2)<4> uwAVS_RESPONSE_2(10,8)<4;4,1>
- mov (4) uwDEST_Y(22,2)<4> uwAVS_RESPONSE_2(10,4)<4;4,1>
- mov (4) uwDEST_Y(23,2)<4> uwAVS_RESPONSE_2(10,12)<4;4,1>
- mov (4) uwDEST_Y(26,2)<4> uwAVS_RESPONSE_2(11,0)<4;4,1>
- mov (4) uwDEST_Y(27,2)<4> uwAVS_RESPONSE_2(11,8)<4;4,1>
- mov (4) uwDEST_Y(30,2)<4> uwAVS_RESPONSE_2(11,4)<4;4,1>
- mov (4) uwDEST_Y(31,2)<4> uwAVS_RESPONSE_2(11,12)<4;4,1>
-
-// Move second 8x8 words of A to dest GRF
- mov (4) uwDEST_Y(2,3)<4> 0:uw
- mov (4) uwDEST_Y(3,3)<4> 0:uw
- mov (4) uwDEST_Y(6,3)<4> 0:uw
- mov (4) uwDEST_Y(7,3)<4> 0:uw
- mov (4) uwDEST_Y(10,3)<4> 0:uw
- mov (4) uwDEST_Y(11,3)<4> 0:uw
- mov (4) uwDEST_Y(14,3)<4> 0:uw
- mov (4) uwDEST_Y(15,3)<4> 0:uw
- mov (4) uwDEST_Y(18,3)<4> 0:uw
- mov (4) uwDEST_Y(19,3)<4> 0:uw
- mov (4) uwDEST_Y(22,3)<4> 0:uw
- mov (4) uwDEST_Y(23,3)<4> 0:uw
- mov (4) uwDEST_Y(26,3)<4> 0:uw
- mov (4) uwDEST_Y(27,3)<4> 0:uw
- mov (4) uwDEST_Y(30,3)<4> 0:uw
- mov (4) uwDEST_Y(31,3)<4> 0:uw
-
-/* This section will be used if 16-bit output is needed in planar format -vK
- // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF.
- $for(0; <8/2; 1) {
- mov (8) uwDEST_Y(%1*2)<1> uwAVS_RESPONSE(%1,0)<8;4,1>
- mov (8) uwDEST_Y(%1*2+1)<1> uwAVS_RESPONSE(%1,8)<8;4,1>
- }
-
- // Move 1st 8x8 words of U to dest GRF (Copy high byte in a word)
- mov (8) uwDEST_U(0)<1> uwAVS_RESPONSE(4,0)<8;4,1>
- mov (8) uwDEST_U(1)<1> uwAVS_RESPONSE(4,8)<8;4,1>
- mov (8) uwDEST_U(2)<1> uwAVS_RESPONSE(5,0)<8;4,1>
- mov (8) uwDEST_U(3)<1> uwAVS_RESPONSE(5,8)<8;4,1>
- mov (8) uwDEST_U(4)<1> uwAVS_RESPONSE(8,0)<8;4,1>
- mov (8) uwDEST_U(5)<1> uwAVS_RESPONSE(8,8)<8;4,1>
- mov (8) uwDEST_U(6)<1> uwAVS_RESPONSE(9,0)<8;4,1>
- mov (8) uwDEST_U(7)<1> uwAVS_RESPONSE(9,8)<8;4,1>
-
- // Move 1st 8x8 words of V to dest GRF
- mov (8) uwDEST_V(0)<1> uwAVS_RESPONSE(6,0)<8;4,1>
- mov (8) uwDEST_V(1)<1> uwAVS_RESPONSE(6,8)<8;4,1>
- mov (8) uwDEST_V(2)<1> uwAVS_RESPONSE(7,0)<8;4,1>
- mov (8) uwDEST_V(3)<1> uwAVS_RESPONSE(7,8)<8;4,1>
- mov (8) uwDEST_V(4)<1> uwAVS_RESPONSE(10,0)<8;4,1>
- mov (8) uwDEST_V(5)<1> uwAVS_RESPONSE(10,8)<8;4,1>
- mov (8) uwDEST_V(6)<1> uwAVS_RESPONSE(11,0)<8;4,1>
- mov (8) uwDEST_V(7)<1> uwAVS_RESPONSE(11,8)<8;4,1>
-
- // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF.
- $for(0; <8/2; 1) {
- mov (8) uwDEST_Y(%1*2,8)<1> uwAVS_RESPONSE_2(%1,0)<8;4,1>
- mov (8) uwDEST_Y(%1*2+1,8)<1> uwAVS_RESPONSE_2(%1,8)<8;4,1>
- }
-
- // Move 2st 8x8 words of U to dest GRF (Copy high byte in a word)
- mov (8) uwDEST_U(0,8)<1> uwAVS_RESPONSE_2(4,0)<8;4,1>
- mov (8) uwDEST_U(1,8)<1> uwAVS_RESPONSE_2(4,8)<8;4,1>
- mov (8) uwDEST_U(2,8)<1> uwAVS_RESPONSE_2(5,0)<8;4,1>
- mov (8) uwDEST_U(3,8)<1> uwAVS_RESPONSE_2(5,8)<8;4,1>
- mov (8) uwDEST_U(4,8)<1> uwAVS_RESPONSE_2(8,0)<8;4,1>
- mov (8) uwDEST_U(5,8)<1> uwAVS_RESPONSE_2(8,8)<8;4,1>
- mov (8) uwDEST_U(6,8)<1> uwAVS_RESPONSE_2(9,0)<8;4,1>
- mov (8) uwDEST_U(7,8)<1> uwAVS_RESPONSE_2(9,8)<8;4,1>
-
- // Move 2st 8x8 words of V to dest GRF
- mov (8) uwDEST_V(0,8)<1> uwAVS_RESPONSE_2(6,0)<8;4,1>
- mov (8) uwDEST_V(1,8)<1> uwAVS_RESPONSE_2(6,8)<8;4,1>
- mov (8) uwDEST_V(2,8)<1> uwAVS_RESPONSE_2(7,0)<8;4,1>
- mov (8) uwDEST_V(3,8)<1> uwAVS_RESPONSE_2(7,8)<8;4,1>
- mov (8) uwDEST_V(4,8)<1> uwAVS_RESPONSE_2(10,0)<8;4,1>
- mov (8) uwDEST_V(5,8)<1> uwAVS_RESPONSE_2(10,8)<8;4,1>
- mov (8) uwDEST_V(6,8)<1> uwAVS_RESPONSE_2(11,0)<8;4,1>
- mov (8) uwDEST_V(7,8)<1> uwAVS_RESPONSE_2(11,8)<8;4,1>
-*/
-#else
- // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF.
- $for(0; <8/2; 1) {
- mov (8) uwDEST_Y(%1*2)<1> ubAVS_RESPONSE(%1,1)<16;4,2> // Copy high byte in a word
- mov (8) uwDEST_Y(%1*2+1)<1> ubAVS_RESPONSE(%1,8+1)<16;4,2> // Copy high byte in a word
- }
-
- // Move 1st 8x8 words of U to dest GRF (Copy high byte in a word)
- mov (8) uwDEST_V(0)<1> ubAVS_RESPONSE(4,1)<16;4,2>
- mov (8) uwDEST_V(1)<1> ubAVS_RESPONSE(4,8+1)<16;4,2>
- mov (8) uwDEST_V(2)<1> ubAVS_RESPONSE(5,1)<16;4,2>
- mov (8) uwDEST_V(3)<1> ubAVS_RESPONSE(5,8+1)<16;4,2>
- mov (8) uwDEST_V(4)<1> ubAVS_RESPONSE(8,1)<16;4,2>
- mov (8) uwDEST_V(5)<1> ubAVS_RESPONSE(8,8+1)<16;4,2>
- mov (8) uwDEST_V(6)<1> ubAVS_RESPONSE(9,1)<16;4,2>
- mov (8) uwDEST_V(7)<1> ubAVS_RESPONSE(9,8+1)<16;4,2>
-
- // Move 1st 8x8 words of V to dest GRF
- mov (8) uwDEST_U(0)<1> ubAVS_RESPONSE(6,1)<16;4,2>
- mov (8) uwDEST_U(1)<1> ubAVS_RESPONSE(6,8+1)<16;4,2>
- mov (8) uwDEST_U(2)<1> ubAVS_RESPONSE(7,1)<16;4,2>
- mov (8) uwDEST_U(3)<1> ubAVS_RESPONSE(7,8+1)<16;4,2>
- mov (8) uwDEST_U(4)<1> ubAVS_RESPONSE(10,1)<16;4,2>
- mov (8) uwDEST_U(5)<1> ubAVS_RESPONSE(10,8+1)<16;4,2>
- mov (8) uwDEST_U(6)<1> ubAVS_RESPONSE(11,1)<16;4,2>
- mov (8) uwDEST_U(7)<1> ubAVS_RESPONSE(11,8+1)<16;4,2>
-
- // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF.
- $for(0; <8/2; 1) {
- mov (8) uwDEST_Y(%1*2,8)<1> ubAVS_RESPONSE_2(%1,1)<16;4,2> // Copy high byte in a word
- mov (8) uwDEST_Y(%1*2+1,8)<1> ubAVS_RESPONSE_2(%1,8+1)<16;4,2> // Copy high byte in a word
- }
-
- // Move 2st 8x8 words of U to dest GRF (Copy high byte in a word)
- mov (8) uwDEST_V(0,8)<1> ubAVS_RESPONSE_2(4,1)<16;4,2>
- mov (8) uwDEST_V(1,8)<1> ubAVS_RESPONSE_2(4,8+1)<16;4,2>
- mov (8) uwDEST_V(2,8)<1> ubAVS_RESPONSE_2(5,1)<16;4,2>
- mov (8) uwDEST_V(3,8)<1> ubAVS_RESPONSE_2(5,8+1)<16;4,2>
- mov (8) uwDEST_V(4,8)<1> ubAVS_RESPONSE_2(8,1)<16;4,2>
- mov (8) uwDEST_V(5,8)<1> ubAVS_RESPONSE_2(8,8+1)<16;4,2>
- mov (8) uwDEST_V(6,8)<1> ubAVS_RESPONSE_2(9,1)<16;4,2>
- mov (8) uwDEST_V(7,8)<1> ubAVS_RESPONSE_2(9,8+1)<16;4,2>
-
- // Move 2st 8x8 words of V to dest GRF
- mov (8) uwDEST_U(0,8)<1> ubAVS_RESPONSE_2(6,1)<16;4,2>
- mov (8) uwDEST_U(1,8)<1> ubAVS_RESPONSE_2(6,8+1)<16;4,2>
- mov (8) uwDEST_U(2,8)<1> ubAVS_RESPONSE_2(7,1)<16;4,2>
- mov (8) uwDEST_U(3,8)<1> ubAVS_RESPONSE_2(7,8+1)<16;4,2>
- mov (8) uwDEST_U(4,8)<1> ubAVS_RESPONSE_2(10,1)<16;4,2>
- mov (8) uwDEST_U(5,8)<1> ubAVS_RESPONSE_2(10,8+1)<16;4,2>
- mov (8) uwDEST_U(6,8)<1> ubAVS_RESPONSE_2(11,1)<16;4,2>
- mov (8) uwDEST_U(7,8)<1> ubAVS_RESPONSE_2(11,8+1)<16;4,2>
-#endif
-
- // Re-define new # of lines
- #undef nUV_NUM_OF_ROWS
- #undef nY_NUM_OF_ROWS
-
- #define nY_NUM_OF_ROWS 8
- #define nUV_NUM_OF_ROWS 8
-
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm b/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
deleted file mode 100644
index 69330ba..0000000
--- a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * All Video Processing kernels
- * Copyright © <2010>, Intel Corporation.
- *
- * This program is licensed under the terms and conditions of the
- * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
- * http://www.opensource.org/licenses/eclipse-1.0.php.
- *
- */
-
-#define DI_ENABLE
-
- #include "DNDI.inc"
-
- #undef nY_NUM_OF_ROWS
- #define nY_NUM_OF_ROWS 8 // Number of Y rows per block (4 rows for each frame)
- #undef nUV_NUM_OF_ROWS
- #define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block
-
- #undef nSMPL_RESP_LEN
- #define nSMPL_RESP_LEN nSMPL_RESP_LEN_DNDI // set the number of GRF
- #undef nDPW_BLOCK_SIZE_HIST
- #define nDPW_BLOCK_SIZE_HIST nBLOCK_WIDTH_4+nBLOCK_HEIGHT_1 // HIST Block Size for Write is 4x2
- #undef nDPW_BLOCK_SIZE_DN
- #define nDPW_BLOCK_SIZE_DN nBLOCK_WIDTH_16+nBLOCK_HEIGHT_4 // DN Block Size for Write is 16x4
- #undef nDPR_BLOCK_SIZE_UV
- #define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_16+nBLOCK_HEIGHT_2 // DN Block Size for UV Write/Read is 16x2
-
-////////////////////////////////////// Run the DN Algorithm ///////////////////////////////////////
- #include "DNDI_COMMAND.asm"
-
-////////////////////////////////////// Rearrange for Internal Planar //////////////////////////////
- // move the previous frame Y component to internal planar format
- $for (0; <nY_NUM_OF_ROWS/2; 1) {
- mov (16) uwDEST_Y(%1,0)<1> ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16)
- }
- // move the previous frame U,V components to internal planar format
- $for (0; <nUV_NUM_OF_ROWS/2; 1) {
- mov (8) uwDEST_U(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels
- mov (8) uwDEST_V(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels
- }
- // move the current frame Y component to internal planar format
- $for (0; <nY_NUM_OF_ROWS/2; 1) {
- mov (16) uwDEST_Y(%1+4,0)<1> ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16)
- }
- // move the current frame U,V components to internal planar format
- $for (0; <nUV_NUM_OF_ROWS/2; 1) {
- mov (8) uwDEST_U(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels
- mov (8) uwDEST_V(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels
- }
-
-////////////////////////////////////// Save the STMM Data for Next Run /////////////////////////
- // Write STMM to memory
- shr (1) rMSGSRC.0<1>:ud wORIX<0;1,0>:w 1:w // X origin / 2
- mov (1) rMSGSRC.1<1>:ud wORIY<0;1,0>:w // Y origin
- mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_STMM:ud // block width and height (8x4)
- mov (8) mudMSGHDR_STMM(0)<1> rMSGSRC.0<8;8,1>:ud // message header
- mov (8) mudMSGHDR_STMM(1)<1> udRESP(nDI_STMM_OFFSET,0) // Move STMM to MRF
- send (8) dNULLREG mMSGHDR_STMM udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_STMM+nBI_STMM_HISTORY_OUTPUT:ud
-
-////////////////////////////////////// Save the History Data for Next Run /////////////////////////
- #include "DI_Hist_Save.asm"
-
-////////////////////////////////////// Save the DN Curr Frame for Next Run ////////////////////////
- add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub npDN_YUV:w
- // check top/bottom field first
- cmp.e.f0.0 (1) null<1>:w ubTFLD_FIRST<0;1,0>:ub 1:w
- (f0.0) jmpi (1) TOP_FIELD_FIRST
-
-BOTTOM_FIELD_FIRST:
- $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) {
- mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2)
- mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3)
- }
- jmpi (1) SAVE_DN_CURR
-
-TOP_FIELD_FIRST:
- $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) {
- mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2)
- mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3)
- }
-SAVE_DN_CURR:
- mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin
- mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4)
- mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud
- send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud
-
-
-/////////////////////////////NV12 UV Copy 422/////////////////////////////////////////////////////
- //Read UV through DATAPORT
- add (2) rMSGSRC.0<1>:d wORIX<2;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
- asr (1) rMSGSRC.1<1>:d rMSGSRC.1<0;1,0>:d 1:w // U/V block origin should be half of Y's
- mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // U/V block width and height (16x2)
- mov (8) mudMSGHDR_DN<1> rMSGSRC<8;8,1>:ud
- send (8) udBOT_U_IO(0)<1> mMSGHDR_DN udDUMMY_NULL nDATAPORT_READ nDPMR_MSGDSC+nRESLEN_1+nBI_CURRENT_SRC_UV:ud
-
- //Write UV through DATAPORT
- mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin
- asr (1) rMSGSRC.1<1>:d rMSGSRC.1<0;1,0>:d 1:w // U/V block origin should be half of Y's
- mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // block width and height (16x2)
- mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud
- mov (8) mudMSGHDR_DN(1)<1> udBOT_U_IO(0)<8;8,1>
- send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud
\ No newline at end of file
diff --git a/src/shaders/post_processing/Makefile.am b/src/shaders/post_processing/Makefile.am
index 41b68bf..0dfd633 100644
--- a/src/shaders/post_processing/Makefile.am
+++ b/src/shaders/post_processing/Makefile.am
@@ -1,191 +1,4 @@
-
-INTEL_G4I =
-
-INTEL_G4A = null.g4a
-INTEL_G4B = null.g4b
-INTEL_G4B_GEN5 = null.g4b.gen5
-INTEL_G6A = null.g6a
-INTEL_G6B = null.g6b
-
-INTEL_PP_G4B_GEN5 = \
- nv12_avs_nv12.g4b.gen5 \
- nv12_dndi_nv12.g4b.gen5 \
- nv12_load_save_nv12.g4b.gen5 \
- nv12_scaling_nv12.g4b.gen5 \
- $(NULL)
-
-INTEL_PP_G6B = \
- nv12_avs_nv12.g6b \
- nv12_dndi_nv12.g6b \
- nv12_load_save_nv12.g6b \
- nv12_scaling_nv12.g6b \
- $(NULL)
-
-INTEL_PP_ASM = \
- nv12_avs_nv12.asm \
- nv12_dndi_nv12.asm \
- nv12_load_save_nv12.asm \
- nv12_scaling_nv12.asm \
- $(NULL)
-
-INTEL_PP_ASM += \
- Common/AYUV_Load_16x8.asm \
- Common/IMC3_Load_8x4.asm \
- Common/IMC3_Load_8x5.asm \
- Common/IMC3_Load_9x5.asm \
- Common/Init_All_Regs.asm \
- Common/Multiple_Loop.asm \
- Common/Multiple_Loop_Head.asm \
- Common/NV11_Load_4x8.asm \
- Common/NV11_Load_5x8.asm \
- Common/NV12_Load_8x4.asm \
- Common/NV12_Load_8x5.asm \
- Common/NV12_Load_9x5.asm \
- Common/P208_Load_8x8.asm \
- Common/P208_Load_9x8.asm \
- Common/PA_Load_8x8.asm \
- Common/PA_Load_9x8.asm \
- Common/PL16x8_PL8x4.asm \
- Common/PL16x8_PL8x8.asm \
- Common/PL4x8_Save_NV11.asm \
- Common/PL5x8_PL16x8.asm \
- Common/PL5x8_PL8x8.asm \
- Common/PL8x4_Save_IMC3.asm \
- Common/PL8x4_Save_NV12.asm \
- Common/PL8x5_PL8x8.asm \
- Common/PL8x8_PL8x4.asm \
- Common/PL8x8_Save_P208.asm \
- Common/PL8x8_Save_PA.asm \
- Common/PL9x5_PL16x8.asm \
- Common/PL9x8_PL16x8.asm \
- Common/RGB16x8_Save_RGB.asm \
- Common/RGB16x8_Save_RGB16.asm \
- Common/RGB16x8_Save_Y416.asm \
- Common/RGB_Pack.asm \
- Common/SetupVPKernel.asm \
- Common/readSampler16x1.asm \
- Core_Kernels/AVS_SetupFirstBlock.asm \
- Core_Kernels/AVS_SetupSecondBlock.asm \
- Core_Kernels/DI_Hist_Save.asm \
- Core_Kernels/DI_SAVE_PA.asm \
- Core_Kernels/DNDI_COMMAND.asm \
- Core_Kernels/DNDI_Hist_Save.asm \
- Core_Kernels/PA_AVS_IEF_16x8.asm \
- Core_Kernels/PA_AVS_IEF_8x4.asm \
- Core_Kernels/PA_AVS_IEF_8x8.asm \
- Core_Kernels/PA_AVS_IEF_Sample.asm \
- Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm \
- Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm \
- Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm \
- Core_Kernels/PA_DNDI_ALG.asm \
- Core_Kernels/PA_DN_ALG.asm \
- Core_Kernels/PA_Scaling.asm \
- Core_Kernels/PL2_AVS_IEF_16x8.asm \
- Core_Kernels/PL2_AVS_IEF_8x4.asm \
- Core_Kernels/PL2_AVS_IEF_8x8.asm \
- Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm \
- Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm \
- Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm \
- Core_Kernels/PL2_Scaling.asm \
- Core_Kernels/PL3_AVS_IEF_16x8.asm \
- Core_Kernels/PL3_AVS_IEF_8x4.asm \
- Core_Kernels/PL3_AVS_IEF_8x8.asm \
- Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm \
- Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm \
- Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm \
- Core_Kernels/PL3_Scaling.asm \
- Core_Kernels/PL_DNDI_ALG.asm \
- Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm \
- Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm \
- Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm \
- Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm \
- Core_Kernels/PL_DN_ALG.asm \
- Core_Kernels/RGB_AVS_IEF_16x8.asm \
- Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm \
- Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm \
- Core_Kernels/RGB_Scaling.asm \
- $(NULL)
-
-INTEL_PP_INC = \
- Common/AYUV_Load_16x8.inc \
- Common/Expansion.inc \
- Common/PA_Load.inc \
- Common/PL2_Load.inc \
- Common/PL3_Load.inc \
- Common/PL4x8_Save_NV11.inc \
- Common/PL8x4_Save_IMC3.inc \
- Common/PL8x4_Save_NV12.inc \
- Common/PL8x8_PL8x4.inc \
- Common/PL8x8_Save_P208.inc \
- Common/PL8x8_Save_PA.inc \
- Common/RGB16x8_Save_RGB.inc \
- Common/RGB16x8_Save_RGB16.inc \
- Common/RGB16x8_Save_Y416.inc \
- Common/common.inc \
- Common/undefall.inc \
- Core_Kernels/AVS_IEF.inc \
- Core_Kernels/DI.inc \
- Core_Kernels/DNDI.inc \
- Core_Kernels/Scaling.inc
- $(NULL)
-
-INTEL_PP_GEN5_ASM = $(INTEL_PP_G4B_GEN5:%.g4b.gen5=%.g5s)
-INTEL_PP_GEN6_ASM = $(INTEL_PP_G6B:%.g6b=%.g6s)
-
-TARGETS =
-if HAVE_GEN4ASM
-TARGETS += $(INTEL_PP_G4B_GEN5)
-TARGETS += $(INTEL_PP_G6B)
-endif
-
-all-local: $(TARGETS)
-
-SUFFIXES = .g4a .g4b .g6a .g6b .g5s .g6s .asm
-
-if HAVE_GEN4ASM
-.g4a.g4b:
- $(AM_V_GEN)m4 $*.g4a > $*.g4m && \
- $(AM_V_GEN)$(GEN4ASM) -o $@ $*.g4m && \
- $(AM_V_GEN)$(GEN4ASM) -g 5 -o $@.gen5 $*.g4m && \
- rm $*.g4m
-
-.g6a.g6b:
- $(AM_V_GEN)m4 $< > $*.g6m && \
- $(AM_V_GEN)$(GEN4ASM) -g 6 -o $@ $*.g6m && \
- rm $*.g6m
-
-$(INTEL_G4B): $(INTEL_G4I)
-
-$(INTEL_PP_GEN5_ASM): $(INTEL_PP_ASM)
-.asm.g5s:
- $(AM_V_GEN)cpp -D DEV_ILK -I Common/ -I Core_Kernels $< > _pp0.$@; \
- ../gpp.py _pp0.$@ $@; \
- rm _pp0.$@
-.g5s.g4b.gen5:
- $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 5 $<
-
-$(INTEL_PP_GEN6_ASM): $(INTEL_PP_ASM)
-.asm.g6s:
- $(AM_V_GEN)cpp -D GT -I Common/ -I Core_Kernels $< > _pp0.$@; \
- ../gpp.py _pp0.$@ $@; \
- rm _pp0.$@
-.g6s.g6b:
- $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 6 $<
-endif
-
-CLEANFILES = $(INTEL_PP_GEN5_ASM) $(INTEL_PP_GEN6_ASM)
-
-EXTRA_DIST = \
- $(INTEL_G4A) \
- $(INTEL_G4B) \
- $(INTEL_G4B_GEN5) \
- $(INTEL_G4I) \
- $(INTEL_G6B) \
- $(INTEL_PP_ASM) \
- $(INTEL_PP_G4B_GEN5) \
- $(INTEL_PP_G6B) \
- $(INTEL_PP_INC) \
- $(NULL)
+SUBDIRS = gen5_6 gen7
# Extra clean files so that maintainer-clean removes *everything*
MAINTAINERCLEANFILES = Makefile.in
diff --git a/src/shaders/post_processing/Common/AYUV_Load_16x8.asm b/src/shaders/post_processing/gen5_6/Common/AYUV_Load_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/AYUV_Load_16x8.asm
rename to src/shaders/post_processing/gen5_6/Common/AYUV_Load_16x8.asm
diff --git a/src/shaders/post_processing/Common/AYUV_Load_16x8.inc b/src/shaders/post_processing/gen5_6/Common/AYUV_Load_16x8.inc
similarity index 100%
rename from src/shaders/post_processing/Common/AYUV_Load_16x8.inc
rename to src/shaders/post_processing/gen5_6/Common/AYUV_Load_16x8.inc
diff --git a/src/shaders/post_processing/Common/Expansion.inc b/src/shaders/post_processing/gen5_6/Common/Expansion.inc
similarity index 100%
rename from src/shaders/post_processing/Common/Expansion.inc
rename to src/shaders/post_processing/gen5_6/Common/Expansion.inc
diff --git a/src/shaders/post_processing/Common/IMC3_Load_8x4.asm b/src/shaders/post_processing/gen5_6/Common/IMC3_Load_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Common/IMC3_Load_8x4.asm
rename to src/shaders/post_processing/gen5_6/Common/IMC3_Load_8x4.asm
diff --git a/src/shaders/post_processing/Common/IMC3_Load_8x5.asm b/src/shaders/post_processing/gen5_6/Common/IMC3_Load_8x5.asm
similarity index 100%
rename from src/shaders/post_processing/Common/IMC3_Load_8x5.asm
rename to src/shaders/post_processing/gen5_6/Common/IMC3_Load_8x5.asm
diff --git a/src/shaders/post_processing/Common/IMC3_Load_9x5.asm b/src/shaders/post_processing/gen5_6/Common/IMC3_Load_9x5.asm
similarity index 100%
rename from src/shaders/post_processing/Common/IMC3_Load_9x5.asm
rename to src/shaders/post_processing/gen5_6/Common/IMC3_Load_9x5.asm
diff --git a/src/shaders/post_processing/Common/Init_All_Regs.asm b/src/shaders/post_processing/gen5_6/Common/Init_All_Regs.asm
similarity index 100%
rename from src/shaders/post_processing/Common/Init_All_Regs.asm
rename to src/shaders/post_processing/gen5_6/Common/Init_All_Regs.asm
diff --git a/src/shaders/post_processing/Common/Multiple_Loop.asm b/src/shaders/post_processing/gen5_6/Common/Multiple_Loop.asm
similarity index 100%
rename from src/shaders/post_processing/Common/Multiple_Loop.asm
rename to src/shaders/post_processing/gen5_6/Common/Multiple_Loop.asm
diff --git a/src/shaders/post_processing/Common/Multiple_Loop_Head.asm b/src/shaders/post_processing/gen5_6/Common/Multiple_Loop_Head.asm
similarity index 100%
rename from src/shaders/post_processing/Common/Multiple_Loop_Head.asm
rename to src/shaders/post_processing/gen5_6/Common/Multiple_Loop_Head.asm
diff --git a/src/shaders/post_processing/Common/NV11_Load_4x8.asm b/src/shaders/post_processing/gen5_6/Common/NV11_Load_4x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/NV11_Load_4x8.asm
rename to src/shaders/post_processing/gen5_6/Common/NV11_Load_4x8.asm
diff --git a/src/shaders/post_processing/Common/NV11_Load_5x8.asm b/src/shaders/post_processing/gen5_6/Common/NV11_Load_5x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/NV11_Load_5x8.asm
rename to src/shaders/post_processing/gen5_6/Common/NV11_Load_5x8.asm
diff --git a/src/shaders/post_processing/Common/NV12_Load_8x4.asm b/src/shaders/post_processing/gen5_6/Common/NV12_Load_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Common/NV12_Load_8x4.asm
rename to src/shaders/post_processing/gen5_6/Common/NV12_Load_8x4.asm
diff --git a/src/shaders/post_processing/Common/NV12_Load_8x5.asm b/src/shaders/post_processing/gen5_6/Common/NV12_Load_8x5.asm
similarity index 100%
rename from src/shaders/post_processing/Common/NV12_Load_8x5.asm
rename to src/shaders/post_processing/gen5_6/Common/NV12_Load_8x5.asm
diff --git a/src/shaders/post_processing/Common/NV12_Load_9x5.asm b/src/shaders/post_processing/gen5_6/Common/NV12_Load_9x5.asm
similarity index 100%
rename from src/shaders/post_processing/Common/NV12_Load_9x5.asm
rename to src/shaders/post_processing/gen5_6/Common/NV12_Load_9x5.asm
diff --git a/src/shaders/post_processing/Common/P208_Load_8x8.asm b/src/shaders/post_processing/gen5_6/Common/P208_Load_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/P208_Load_8x8.asm
rename to src/shaders/post_processing/gen5_6/Common/P208_Load_8x8.asm
diff --git a/src/shaders/post_processing/Common/P208_Load_9x8.asm b/src/shaders/post_processing/gen5_6/Common/P208_Load_9x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/P208_Load_9x8.asm
rename to src/shaders/post_processing/gen5_6/Common/P208_Load_9x8.asm
diff --git a/src/shaders/post_processing/Common/PA_Load.inc b/src/shaders/post_processing/gen5_6/Common/PA_Load.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PA_Load.inc
rename to src/shaders/post_processing/gen5_6/Common/PA_Load.inc
diff --git a/src/shaders/post_processing/gen5_6/Common/PA_Load_8x8.asm b/src/shaders/post_processing/gen5_6/Common/PA_Load_8x8.asm
new file mode 100755
index 0000000..789034f
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/Common/PA_Load_8x8.asm
@@ -0,0 +1,33 @@
+/*
+ * All Video Processing kernels
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+
+// Module name: PA_Load_8x8.asm
+//----------------------------------------------------------------
+
+#define PA_LOAD_8x8
+#include "PA_Load.inc"
+
+// Load 16x8 packed data block
+// Packed data block should be loaded as 32x8 pixel block
+ add (2) rMSGSRC.0<1>:d wORIX<2;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Block origin
+ shl (1) rMSGSRC.0<1>:d rMSGSRC.0<0;1,0>:w 1:w // H. block origin need to be doubled
+ mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_YUV:ud // Block width and height (32x8)
+ mov (8) mMSGHDRY<1>:ud rMSGSRC<8;8,1>:ud
+ send (8) udSRC_YUV(0)<1> mMSGHDRY udDUMMY_NULL nDATAPORT_READ nDPMR_MSGDSC+nDPR_MSG_SIZE_YUV+nBI_CURRENT_SRC_YUV:ud
+
+// Unpack to "planar" YUV422 format in word-aligned bytes
+ add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub nSRC_YUV_REG*nGRFWIB:w // Initial Y,U,V offset in YUV422 block
+ $for(0; <nY_NUM_OF_ROWS; 1) {
+ mov (16) uwDEST_Y(0, %1*16)<1> r[pCF_Y_OFFSET, %1*nGRFWIB]REGION(16,2)
+ mov (8) uwDEST_U(0, %1*8)<1> r[pCF_U_OFFSET, %1*nGRFWIB]REGION(8,4)
+ mov (8) uwDEST_V(0, %1*8)<1> r[pCF_V_OFFSET, %1*nGRFWIB]REGION(8,4)
+ }
+
+// End of PA_Load_8x8
diff --git a/src/shaders/post_processing/Common/PA_Load_9x8.asm b/src/shaders/post_processing/gen5_6/Common/PA_Load_9x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PA_Load_9x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PA_Load_9x8.asm
diff --git a/src/shaders/post_processing/Common/PL16x8_PL8x4.asm b/src/shaders/post_processing/gen5_6/Common/PL16x8_PL8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL16x8_PL8x4.asm
rename to src/shaders/post_processing/gen5_6/Common/PL16x8_PL8x4.asm
diff --git a/src/shaders/post_processing/Common/PL16x8_PL8x8.asm b/src/shaders/post_processing/gen5_6/Common/PL16x8_PL8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL16x8_PL8x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL16x8_PL8x8.asm
diff --git a/src/shaders/post_processing/Common/PL2_Load.inc b/src/shaders/post_processing/gen5_6/Common/PL2_Load.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL2_Load.inc
rename to src/shaders/post_processing/gen5_6/Common/PL2_Load.inc
diff --git a/src/shaders/post_processing/Common/PL3_Load.inc b/src/shaders/post_processing/gen5_6/Common/PL3_Load.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL3_Load.inc
rename to src/shaders/post_processing/gen5_6/Common/PL3_Load.inc
diff --git a/src/shaders/post_processing/Common/PL4x8_Save_NV11.asm b/src/shaders/post_processing/gen5_6/Common/PL4x8_Save_NV11.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL4x8_Save_NV11.asm
rename to src/shaders/post_processing/gen5_6/Common/PL4x8_Save_NV11.asm
diff --git a/src/shaders/post_processing/Common/PL4x8_Save_NV11.inc b/src/shaders/post_processing/gen5_6/Common/PL4x8_Save_NV11.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL4x8_Save_NV11.inc
rename to src/shaders/post_processing/gen5_6/Common/PL4x8_Save_NV11.inc
diff --git a/src/shaders/post_processing/Common/PL5x8_PL16x8.asm b/src/shaders/post_processing/gen5_6/Common/PL5x8_PL16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL5x8_PL16x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL5x8_PL16x8.asm
diff --git a/src/shaders/post_processing/Common/PL5x8_PL8x8.asm b/src/shaders/post_processing/gen5_6/Common/PL5x8_PL8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL5x8_PL8x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL5x8_PL8x8.asm
diff --git a/src/shaders/post_processing/Common/PL8x4_Save_IMC3.asm b/src/shaders/post_processing/gen5_6/Common/PL8x4_Save_IMC3.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x4_Save_IMC3.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x4_Save_IMC3.asm
diff --git a/src/shaders/post_processing/Common/PL8x4_Save_IMC3.inc b/src/shaders/post_processing/gen5_6/Common/PL8x4_Save_IMC3.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x4_Save_IMC3.inc
rename to src/shaders/post_processing/gen5_6/Common/PL8x4_Save_IMC3.inc
diff --git a/src/shaders/post_processing/Common/PL8x4_Save_NV12.asm b/src/shaders/post_processing/gen5_6/Common/PL8x4_Save_NV12.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x4_Save_NV12.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x4_Save_NV12.asm
diff --git a/src/shaders/post_processing/Common/PL8x4_Save_NV12.inc b/src/shaders/post_processing/gen5_6/Common/PL8x4_Save_NV12.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x4_Save_NV12.inc
rename to src/shaders/post_processing/gen5_6/Common/PL8x4_Save_NV12.inc
diff --git a/src/shaders/post_processing/Common/PL8x5_PL8x8.asm b/src/shaders/post_processing/gen5_6/Common/PL8x5_PL8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x5_PL8x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x5_PL8x8.asm
diff --git a/src/shaders/post_processing/Common/PL8x8_PL8x4.asm b/src/shaders/post_processing/gen5_6/Common/PL8x8_PL8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_PL8x4.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_PL8x4.asm
diff --git a/src/shaders/post_processing/Common/PL8x8_PL8x4.inc b/src/shaders/post_processing/gen5_6/Common/PL8x8_PL8x4.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_PL8x4.inc
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_PL8x4.inc
diff --git a/src/shaders/post_processing/Common/PL8x8_Save_P208.asm b/src/shaders/post_processing/gen5_6/Common/PL8x8_Save_P208.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_Save_P208.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_Save_P208.asm
diff --git a/src/shaders/post_processing/Common/PL8x8_Save_P208.inc b/src/shaders/post_processing/gen5_6/Common/PL8x8_Save_P208.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_Save_P208.inc
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_Save_P208.inc
diff --git a/src/shaders/post_processing/Common/PL8x8_Save_PA.asm b/src/shaders/post_processing/gen5_6/Common/PL8x8_Save_PA.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_Save_PA.asm
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_Save_PA.asm
diff --git a/src/shaders/post_processing/Common/PL8x8_Save_PA.inc b/src/shaders/post_processing/gen5_6/Common/PL8x8_Save_PA.inc
similarity index 100%
rename from src/shaders/post_processing/Common/PL8x8_Save_PA.inc
rename to src/shaders/post_processing/gen5_6/Common/PL8x8_Save_PA.inc
diff --git a/src/shaders/post_processing/Common/PL9x5_PL16x8.asm b/src/shaders/post_processing/gen5_6/Common/PL9x5_PL16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL9x5_PL16x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL9x5_PL16x8.asm
diff --git a/src/shaders/post_processing/Common/PL9x8_PL16x8.asm b/src/shaders/post_processing/gen5_6/Common/PL9x8_PL16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Common/PL9x8_PL16x8.asm
rename to src/shaders/post_processing/gen5_6/Common/PL9x8_PL16x8.asm
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_RGB.asm b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB.asm
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_RGB.asm
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB.asm
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_RGB.inc b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB.inc
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_RGB.inc
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB.inc
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_RGB16.asm b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB16.asm
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_RGB16.asm
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB16.asm
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_RGB16.inc b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB16.inc
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_RGB16.inc
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_RGB16.inc
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_Y416.asm b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_Y416.asm
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_Y416.asm
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_Y416.asm
diff --git a/src/shaders/post_processing/Common/RGB16x8_Save_Y416.inc b/src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_Y416.inc
similarity index 100%
rename from src/shaders/post_processing/Common/RGB16x8_Save_Y416.inc
rename to src/shaders/post_processing/gen5_6/Common/RGB16x8_Save_Y416.inc
diff --git a/src/shaders/post_processing/Common/RGB_Pack.asm b/src/shaders/post_processing/gen5_6/Common/RGB_Pack.asm
similarity index 100%
rename from src/shaders/post_processing/Common/RGB_Pack.asm
rename to src/shaders/post_processing/gen5_6/Common/RGB_Pack.asm
diff --git a/src/shaders/post_processing/Common/SetupVPKernel.asm b/src/shaders/post_processing/gen5_6/Common/SetupVPKernel.asm
similarity index 100%
rename from src/shaders/post_processing/Common/SetupVPKernel.asm
rename to src/shaders/post_processing/gen5_6/Common/SetupVPKernel.asm
diff --git a/src/shaders/post_processing/Common/common.inc b/src/shaders/post_processing/gen5_6/Common/common.inc
similarity index 100%
rename from src/shaders/post_processing/Common/common.inc
rename to src/shaders/post_processing/gen5_6/Common/common.inc
diff --git a/src/shaders/post_processing/Common/readSampler16x1.asm b/src/shaders/post_processing/gen5_6/Common/readSampler16x1.asm
similarity index 100%
rename from src/shaders/post_processing/Common/readSampler16x1.asm
rename to src/shaders/post_processing/gen5_6/Common/readSampler16x1.asm
diff --git a/src/shaders/post_processing/Common/undefall.inc b/src/shaders/post_processing/gen5_6/Common/undefall.inc
similarity index 100%
rename from src/shaders/post_processing/Common/undefall.inc
rename to src/shaders/post_processing/gen5_6/Common/undefall.inc
diff --git a/src/shaders/post_processing/Core_Kernels/AVS_IEF.inc b/src/shaders/post_processing/gen5_6/Core_Kernels/AVS_IEF.inc
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/AVS_IEF.inc
rename to src/shaders/post_processing/gen5_6/Core_Kernels/AVS_IEF.inc
diff --git a/src/shaders/post_processing/Core_Kernels/AVS_SetupFirstBlock.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/AVS_SetupFirstBlock.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/AVS_SetupFirstBlock.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/AVS_SetupFirstBlock.asm
diff --git a/src/shaders/post_processing/Core_Kernels/AVS_SetupSecondBlock.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/AVS_SetupSecondBlock.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/AVS_SetupSecondBlock.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/AVS_SetupSecondBlock.asm
diff --git a/src/shaders/post_processing/Core_Kernels/DI.inc b/src/shaders/post_processing/gen5_6/Core_Kernels/DI.inc
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DI.inc
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DI.inc
diff --git a/src/shaders/post_processing/Core_Kernels/DI_Hist_Save.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/DI_Hist_Save.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DI_Hist_Save.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DI_Hist_Save.asm
diff --git a/src/shaders/post_processing/Core_Kernels/DI_SAVE_PA.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/DI_SAVE_PA.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DI_SAVE_PA.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DI_SAVE_PA.asm
diff --git a/src/shaders/post_processing/Core_Kernels/DNDI.inc b/src/shaders/post_processing/gen5_6/Core_Kernels/DNDI.inc
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DNDI.inc
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DNDI.inc
diff --git a/src/shaders/post_processing/Core_Kernels/DNDI_COMMAND.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/DNDI_COMMAND.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DNDI_COMMAND.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DNDI_COMMAND.asm
diff --git a/src/shaders/post_processing/Core_Kernels/DNDI_Hist_Save.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/DNDI_Hist_Save.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/DNDI_Hist_Save.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/DNDI_Hist_Save.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_8x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Sample.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Sample.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Sample.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Sample.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_DNDI_ALG.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_DNDI_ALG.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_DNDI_ALG.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_DNDI_ALG.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_DN_ALG.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_DN_ALG.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_DN_ALG.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_DN_ALG.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PA_Scaling.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PA_Scaling.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PA_Scaling.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PA_Scaling.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_8x8.asm
diff --git a/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm
new file mode 100644
index 0000000..6c994c1
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm
@@ -0,0 +1,271 @@
+/*
+ * All Video Processing kernels
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+
+//---------- PL2_AVS_IEF_Unpack_16x8.asm ----------
+
+#ifdef AVS_OUTPUT_16_BIT //Output is packed in AVYU format
+// Move first 8x8 words of Y to dest GRF (as packed)
+ mov (4) uwDEST_Y(0,1)<4> uwAVS_RESPONSE(0,0)<4;4,1>
+ mov (4) uwDEST_Y(1,1)<4> uwAVS_RESPONSE(0,8)<4;4,1>
+ mov (4) uwDEST_Y(4,1)<4> uwAVS_RESPONSE(0,4)<4;4,1>
+ mov (4) uwDEST_Y(5,1)<4> uwAVS_RESPONSE(0,12)<4;4,1>
+ mov (4) uwDEST_Y(8,1)<4> uwAVS_RESPONSE(1,0)<4;4,1>
+ mov (4) uwDEST_Y(9,1)<4> uwAVS_RESPONSE(1,8)<4;4,1>
+ mov (4) uwDEST_Y(12,1)<4> uwAVS_RESPONSE(1,4)<4;4,1>
+ mov (4) uwDEST_Y(13,1)<4> uwAVS_RESPONSE(1,12)<4;4,1>
+ mov (4) uwDEST_Y(16,1)<4> uwAVS_RESPONSE(2,0)<4;4,1>
+ mov (4) uwDEST_Y(17,1)<4> uwAVS_RESPONSE(2,8)<4;4,1>
+ mov (4) uwDEST_Y(20,1)<4> uwAVS_RESPONSE(2,4)<4;4,1>
+ mov (4) uwDEST_Y(21,1)<4> uwAVS_RESPONSE(2,12)<4;4,1>
+ mov (4) uwDEST_Y(24,1)<4> uwAVS_RESPONSE(3,0)<4;4,1>
+ mov (4) uwDEST_Y(25,1)<4> uwAVS_RESPONSE(3,8)<4;4,1>
+ mov (4) uwDEST_Y(28,1)<4> uwAVS_RESPONSE(3,4)<4;4,1>
+ mov (4) uwDEST_Y(29,1)<4> uwAVS_RESPONSE(3,12)<4;4,1>
+
+// Move first 8x8 words of U to dest GRF (as packed)
+ mov (4) uwDEST_Y(0,0)<4> uwAVS_RESPONSE(4,0)<4;4,1>
+ mov (4) uwDEST_Y(1,0)<4> uwAVS_RESPONSE(4,8)<4;4,1>
+ mov (4) uwDEST_Y(4,0)<4> uwAVS_RESPONSE(4,4)<4;4,1>
+ mov (4) uwDEST_Y(5,0)<4> uwAVS_RESPONSE(4,12)<4;4,1>
+ mov (4) uwDEST_Y(8,0)<4> uwAVS_RESPONSE(5,0)<4;4,1>
+ mov (4) uwDEST_Y(9,0)<4> uwAVS_RESPONSE(5,8)<4;4,1>
+ mov (4) uwDEST_Y(12,0)<4> uwAVS_RESPONSE(5,4)<4;4,1>
+ mov (4) uwDEST_Y(13,0)<4> uwAVS_RESPONSE(5,12)<4;4,1>
+ mov (4) uwDEST_Y(16,0)<4> uwAVS_RESPONSE(8,0)<4;4,1>
+ mov (4) uwDEST_Y(17,0)<4> uwAVS_RESPONSE(8,8)<4;4,1>
+ mov (4) uwDEST_Y(20,0)<4> uwAVS_RESPONSE(8,4)<4;4,1>
+ mov (4) uwDEST_Y(21,0)<4> uwAVS_RESPONSE(8,12)<4;4,1>
+ mov (4) uwDEST_Y(24,0)<4> uwAVS_RESPONSE(9,0)<4;4,1>
+ mov (4) uwDEST_Y(25,0)<4> uwAVS_RESPONSE(9,8)<4;4,1>
+ mov (4) uwDEST_Y(28,0)<4> uwAVS_RESPONSE(9,4)<4;4,1>
+ mov (4) uwDEST_Y(29,0)<4> uwAVS_RESPONSE(9,12)<4;4,1>
+
+// Move first 8x8 words of V to dest GRF (as packed)
+ mov (4) uwDEST_Y(0,2)<4> uwAVS_RESPONSE(6,0)<4;4,1>
+ mov (4) uwDEST_Y(1,2)<4> uwAVS_RESPONSE(6,8)<4;4,1>
+ mov (4) uwDEST_Y(4,2)<4> uwAVS_RESPONSE(6,4)<4;4,1>
+ mov (4) uwDEST_Y(5,2)<4> uwAVS_RESPONSE(6,12)<4;4,1>
+ mov (4) uwDEST_Y(8,2)<4> uwAVS_RESPONSE(7,0)<4;4,1>
+ mov (4) uwDEST_Y(9,2)<4> uwAVS_RESPONSE(7,8)<4;4,1>
+ mov (4) uwDEST_Y(12,2)<4> uwAVS_RESPONSE(7,4)<4;4,1>
+ mov (4) uwDEST_Y(13,2)<4> uwAVS_RESPONSE(7,12)<4;4,1>
+ mov (4) uwDEST_Y(16,2)<4> uwAVS_RESPONSE(10,0)<4;4,1>
+ mov (4) uwDEST_Y(17,2)<4> uwAVS_RESPONSE(10,8)<4;4,1>
+ mov (4) uwDEST_Y(20,2)<4> uwAVS_RESPONSE(10,4)<4;4,1>
+ mov (4) uwDEST_Y(21,2)<4> uwAVS_RESPONSE(10,12)<4;4,1>
+ mov (4) uwDEST_Y(24,2)<4> uwAVS_RESPONSE(11,0)<4;4,1>
+ mov (4) uwDEST_Y(25,2)<4> uwAVS_RESPONSE(11,8)<4;4,1>
+ mov (4) uwDEST_Y(28,2)<4> uwAVS_RESPONSE(11,4)<4;4,1>
+ mov (4) uwDEST_Y(29,2)<4> uwAVS_RESPONSE(11,12)<4;4,1>
+
+// Move first 8x8 words of A to dest GRF (as packed)
+ mov (4) uwDEST_Y(0,3)<4> 0:uw
+ mov (4) uwDEST_Y(1,3)<4> 0:uw
+ mov (4) uwDEST_Y(4,3)<4> 0:uw
+ mov (4) uwDEST_Y(5,3)<4> 0:uw
+ mov (4) uwDEST_Y(8,3)<4> 0:uw
+ mov (4) uwDEST_Y(9,3)<4> 0:uw
+ mov (4) uwDEST_Y(12,3)<4> 0:uw
+ mov (4) uwDEST_Y(13,3)<4> 0:uw
+ mov (4) uwDEST_Y(16,3)<4> 0:uw
+ mov (4) uwDEST_Y(17,3)<4> 0:uw
+ mov (4) uwDEST_Y(20,3)<4> 0:uw
+ mov (4) uwDEST_Y(21,3)<4> 0:uw
+ mov (4) uwDEST_Y(24,3)<4> 0:uw
+ mov (4) uwDEST_Y(25,3)<4> 0:uw
+ mov (4) uwDEST_Y(28,3)<4> 0:uw
+ mov (4) uwDEST_Y(29,3)<4> 0:uw
+
+// Move second 8x8 words of Y to dest GRF
+ mov (4) uwDEST_Y(2,1)<4> uwAVS_RESPONSE_2(0,0)<4;4,1>
+ mov (4) uwDEST_Y(3,1)<4> uwAVS_RESPONSE_2(0,8)<4;4,1>
+ mov (4) uwDEST_Y(6,1)<4> uwAVS_RESPONSE_2(0,4)<4;4,1>
+ mov (4) uwDEST_Y(7,1)<4> uwAVS_RESPONSE_2(0,12)<4;4,1>
+ mov (4) uwDEST_Y(10,1)<4> uwAVS_RESPONSE_2(1,0)<4;4,1>
+ mov (4) uwDEST_Y(11,1)<4> uwAVS_RESPONSE_2(1,8)<4;4,1>
+ mov (4) uwDEST_Y(14,1)<4> uwAVS_RESPONSE_2(1,4)<4;4,1>
+ mov (4) uwDEST_Y(15,1)<4> uwAVS_RESPONSE_2(1,12)<4;4,1>
+ mov (4) uwDEST_Y(18,1)<4> uwAVS_RESPONSE_2(2,0)<4;4,1>
+ mov (4) uwDEST_Y(19,1)<4> uwAVS_RESPONSE_2(2,8)<4;4,1>
+ mov (4) uwDEST_Y(22,1)<4> uwAVS_RESPONSE_2(2,4)<4;4,1>
+ mov (4) uwDEST_Y(23,1)<4> uwAVS_RESPONSE_2(2,12)<4;4,1>
+ mov (4) uwDEST_Y(26,1)<4> uwAVS_RESPONSE_2(3,0)<4;4,1>
+ mov (4) uwDEST_Y(27,1)<4> uwAVS_RESPONSE_2(3,8)<4;4,1>
+ mov (4) uwDEST_Y(30,1)<4> uwAVS_RESPONSE_2(3,4)<4;4,1>
+ mov (4) uwDEST_Y(31,1)<4> uwAVS_RESPONSE_2(3,12)<4;4,1>
+
+// Move second 8x8 words of U to dest GRF
+ mov (4) uwDEST_Y(2,0)<4> uwAVS_RESPONSE_2(4,0)<4;4,1>
+ mov (4) uwDEST_Y(3,0)<4> uwAVS_RESPONSE_2(4,8)<4;4,1>
+ mov (4) uwDEST_Y(6,0)<4> uwAVS_RESPONSE_2(4,4)<4;4,1>
+ mov (4) uwDEST_Y(7,0)<4> uwAVS_RESPONSE_2(4,12)<4;4,1>
+ mov (4) uwDEST_Y(10,0)<4> uwAVS_RESPONSE_2(5,0)<4;4,1>
+ mov (4) uwDEST_Y(11,0)<4> uwAVS_RESPONSE_2(5,8)<4;4,1>
+ mov (4) uwDEST_Y(14,0)<4> uwAVS_RESPONSE_2(5,4)<4;4,1>
+ mov (4) uwDEST_Y(15,0)<4> uwAVS_RESPONSE_2(5,12)<4;4,1>
+ mov (4) uwDEST_Y(18,0)<4> uwAVS_RESPONSE_2(8,0)<4;4,1>
+ mov (4) uwDEST_Y(19,0)<4> uwAVS_RESPONSE_2(8,8)<4;4,1>
+ mov (4) uwDEST_Y(22,0)<4> uwAVS_RESPONSE_2(8,4)<4;4,1>
+ mov (4) uwDEST_Y(23,0)<4> uwAVS_RESPONSE_2(8,12)<4;4,1>
+ mov (4) uwDEST_Y(26,0)<4> uwAVS_RESPONSE_2(9,0)<4;4,1>
+ mov (4) uwDEST_Y(27,0)<4> uwAVS_RESPONSE_2(9,8)<4;4,1>
+ mov (4) uwDEST_Y(30,0)<4> uwAVS_RESPONSE_2(9,4)<4;4,1>
+ mov (4) uwDEST_Y(31,0)<4> uwAVS_RESPONSE_2(9,12)<4;4,1>
+
+// Move second 8x8 words of V to dest GRF
+ mov (4) uwDEST_Y(2,2)<4> uwAVS_RESPONSE_2(6,0)<4;4,1>
+ mov (4) uwDEST_Y(3,2)<4> uwAVS_RESPONSE_2(6,8)<4;4,1>
+ mov (4) uwDEST_Y(6,2)<4> uwAVS_RESPONSE_2(6,4)<4;4,1>
+ mov (4) uwDEST_Y(7,2)<4> uwAVS_RESPONSE_2(6,12)<4;4,1>
+ mov (4) uwDEST_Y(10,2)<4> uwAVS_RESPONSE_2(7,0)<4;4,1>
+ mov (4) uwDEST_Y(11,2)<4> uwAVS_RESPONSE_2(7,8)<4;4,1>
+ mov (4) uwDEST_Y(14,2)<4> uwAVS_RESPONSE_2(7,4)<4;4,1>
+ mov (4) uwDEST_Y(15,2)<4> uwAVS_RESPONSE_2(7,12)<4;4,1>
+ mov (4) uwDEST_Y(18,2)<4> uwAVS_RESPONSE_2(10,0)<4;4,1>
+ mov (4) uwDEST_Y(19,2)<4> uwAVS_RESPONSE_2(10,8)<4;4,1>
+ mov (4) uwDEST_Y(22,2)<4> uwAVS_RESPONSE_2(10,4)<4;4,1>
+ mov (4) uwDEST_Y(23,2)<4> uwAVS_RESPONSE_2(10,12)<4;4,1>
+ mov (4) uwDEST_Y(26,2)<4> uwAVS_RESPONSE_2(11,0)<4;4,1>
+ mov (4) uwDEST_Y(27,2)<4> uwAVS_RESPONSE_2(11,8)<4;4,1>
+ mov (4) uwDEST_Y(30,2)<4> uwAVS_RESPONSE_2(11,4)<4;4,1>
+ mov (4) uwDEST_Y(31,2)<4> uwAVS_RESPONSE_2(11,12)<4;4,1>
+
+// Move second 8x8 words of A to dest GRF
+ mov (4) uwDEST_Y(2,3)<4> 0:uw
+ mov (4) uwDEST_Y(3,3)<4> 0:uw
+ mov (4) uwDEST_Y(6,3)<4> 0:uw
+ mov (4) uwDEST_Y(7,3)<4> 0:uw
+ mov (4) uwDEST_Y(10,3)<4> 0:uw
+ mov (4) uwDEST_Y(11,3)<4> 0:uw
+ mov (4) uwDEST_Y(14,3)<4> 0:uw
+ mov (4) uwDEST_Y(15,3)<4> 0:uw
+ mov (4) uwDEST_Y(18,3)<4> 0:uw
+ mov (4) uwDEST_Y(19,3)<4> 0:uw
+ mov (4) uwDEST_Y(22,3)<4> 0:uw
+ mov (4) uwDEST_Y(23,3)<4> 0:uw
+ mov (4) uwDEST_Y(26,3)<4> 0:uw
+ mov (4) uwDEST_Y(27,3)<4> 0:uw
+ mov (4) uwDEST_Y(30,3)<4> 0:uw
+ mov (4) uwDEST_Y(31,3)<4> 0:uw
+
+/* This section will be used if 16-bit output is needed in planar format -vK
+ // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF.
+ $for(0; <8/2; 1) {
+ mov (8) uwDEST_Y(%1*2)<1> uwAVS_RESPONSE(%1,0)<8;4,1>
+ mov (8) uwDEST_Y(%1*2+1)<1> uwAVS_RESPONSE(%1,8)<8;4,1>
+ }
+
+ // Move 1st 8x8 words of U to dest GRF (Copy high byte in a word)
+ mov (8) uwDEST_U(0)<1> uwAVS_RESPONSE(4,0)<8;4,1>
+ mov (8) uwDEST_U(1)<1> uwAVS_RESPONSE(4,8)<8;4,1>
+ mov (8) uwDEST_U(2)<1> uwAVS_RESPONSE(5,0)<8;4,1>
+ mov (8) uwDEST_U(3)<1> uwAVS_RESPONSE(5,8)<8;4,1>
+ mov (8) uwDEST_U(4)<1> uwAVS_RESPONSE(8,0)<8;4,1>
+ mov (8) uwDEST_U(5)<1> uwAVS_RESPONSE(8,8)<8;4,1>
+ mov (8) uwDEST_U(6)<1> uwAVS_RESPONSE(9,0)<8;4,1>
+ mov (8) uwDEST_U(7)<1> uwAVS_RESPONSE(9,8)<8;4,1>
+
+ // Move 1st 8x8 words of V to dest GRF
+ mov (8) uwDEST_V(0)<1> uwAVS_RESPONSE(6,0)<8;4,1>
+ mov (8) uwDEST_V(1)<1> uwAVS_RESPONSE(6,8)<8;4,1>
+ mov (8) uwDEST_V(2)<1> uwAVS_RESPONSE(7,0)<8;4,1>
+ mov (8) uwDEST_V(3)<1> uwAVS_RESPONSE(7,8)<8;4,1>
+ mov (8) uwDEST_V(4)<1> uwAVS_RESPONSE(10,0)<8;4,1>
+ mov (8) uwDEST_V(5)<1> uwAVS_RESPONSE(10,8)<8;4,1>
+ mov (8) uwDEST_V(6)<1> uwAVS_RESPONSE(11,0)<8;4,1>
+ mov (8) uwDEST_V(7)<1> uwAVS_RESPONSE(11,8)<8;4,1>
+
+ // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF.
+ $for(0; <8/2; 1) {
+ mov (8) uwDEST_Y(%1*2,8)<1> uwAVS_RESPONSE_2(%1,0)<8;4,1>
+ mov (8) uwDEST_Y(%1*2+1,8)<1> uwAVS_RESPONSE_2(%1,8)<8;4,1>
+ }
+
+ // Move 2st 8x8 words of U to dest GRF (Copy high byte in a word)
+ mov (8) uwDEST_U(0,8)<1> uwAVS_RESPONSE_2(4,0)<8;4,1>
+ mov (8) uwDEST_U(1,8)<1> uwAVS_RESPONSE_2(4,8)<8;4,1>
+ mov (8) uwDEST_U(2,8)<1> uwAVS_RESPONSE_2(5,0)<8;4,1>
+ mov (8) uwDEST_U(3,8)<1> uwAVS_RESPONSE_2(5,8)<8;4,1>
+ mov (8) uwDEST_U(4,8)<1> uwAVS_RESPONSE_2(8,0)<8;4,1>
+ mov (8) uwDEST_U(5,8)<1> uwAVS_RESPONSE_2(8,8)<8;4,1>
+ mov (8) uwDEST_U(6,8)<1> uwAVS_RESPONSE_2(9,0)<8;4,1>
+ mov (8) uwDEST_U(7,8)<1> uwAVS_RESPONSE_2(9,8)<8;4,1>
+
+ // Move 2st 8x8 words of V to dest GRF
+ mov (8) uwDEST_V(0,8)<1> uwAVS_RESPONSE_2(6,0)<8;4,1>
+ mov (8) uwDEST_V(1,8)<1> uwAVS_RESPONSE_2(6,8)<8;4,1>
+ mov (8) uwDEST_V(2,8)<1> uwAVS_RESPONSE_2(7,0)<8;4,1>
+ mov (8) uwDEST_V(3,8)<1> uwAVS_RESPONSE_2(7,8)<8;4,1>
+ mov (8) uwDEST_V(4,8)<1> uwAVS_RESPONSE_2(10,0)<8;4,1>
+ mov (8) uwDEST_V(5,8)<1> uwAVS_RESPONSE_2(10,8)<8;4,1>
+ mov (8) uwDEST_V(6,8)<1> uwAVS_RESPONSE_2(11,0)<8;4,1>
+ mov (8) uwDEST_V(7,8)<1> uwAVS_RESPONSE_2(11,8)<8;4,1>
+*/
+#else
+ // Move 1st 8x8 words of Y to dest GRF at lower 8 words of each RGF.
+ $for(0; <8/2; 1) {
+ mov (8) uwDEST_Y(%1*2)<1> ubAVS_RESPONSE(%1,1)<16;4,2> // Copy high byte in a word
+ mov (8) uwDEST_Y(%1*2+1)<1> ubAVS_RESPONSE(%1,8+1)<16;4,2> // Copy high byte in a word
+ }
+
+ // Move 1st 8x8 words of U to dest GRF (Copy high byte in a word)
+ mov (8) uwDEST_U(0)<1> ubAVS_RESPONSE(4,1)<16;4,2>
+ mov (8) uwDEST_U(1)<1> ubAVS_RESPONSE(4,8+1)<16;4,2>
+ mov (8) uwDEST_U(2)<1> ubAVS_RESPONSE(5,1)<16;4,2>
+ mov (8) uwDEST_U(3)<1> ubAVS_RESPONSE(5,8+1)<16;4,2>
+ mov (8) uwDEST_U(4)<1> ubAVS_RESPONSE(8,1)<16;4,2>
+ mov (8) uwDEST_U(5)<1> ubAVS_RESPONSE(8,8+1)<16;4,2>
+ mov (8) uwDEST_U(6)<1> ubAVS_RESPONSE(9,1)<16;4,2>
+ mov (8) uwDEST_U(7)<1> ubAVS_RESPONSE(9,8+1)<16;4,2>
+
+ // Move 1st 8x8 words of V to dest GRF
+ mov (8) uwDEST_V(0)<1> ubAVS_RESPONSE(6,1)<16;4,2>
+ mov (8) uwDEST_V(1)<1> ubAVS_RESPONSE(6,8+1)<16;4,2>
+ mov (8) uwDEST_V(2)<1> ubAVS_RESPONSE(7,1)<16;4,2>
+ mov (8) uwDEST_V(3)<1> ubAVS_RESPONSE(7,8+1)<16;4,2>
+ mov (8) uwDEST_V(4)<1> ubAVS_RESPONSE(10,1)<16;4,2>
+ mov (8) uwDEST_V(5)<1> ubAVS_RESPONSE(10,8+1)<16;4,2>
+ mov (8) uwDEST_V(6)<1> ubAVS_RESPONSE(11,1)<16;4,2>
+ mov (8) uwDEST_V(7)<1> ubAVS_RESPONSE(11,8+1)<16;4,2>
+
+ // Move 2nd 8x8 words of Y to dest GRF at higher 8 words of each GRF.
+ $for(0; <8/2; 1) {
+ mov (8) uwDEST_Y(%1*2,8)<1> ubAVS_RESPONSE_2(%1,1)<16;4,2> // Copy high byte in a word
+ mov (8) uwDEST_Y(%1*2+1,8)<1> ubAVS_RESPONSE_2(%1,8+1)<16;4,2> // Copy high byte in a word
+ }
+
+ // Move 2st 8x8 words of U to dest GRF (Copy high byte in a word)
+ mov (8) uwDEST_U(0,8)<1> ubAVS_RESPONSE_2(4,1)<16;4,2>
+ mov (8) uwDEST_U(1,8)<1> ubAVS_RESPONSE_2(4,8+1)<16;4,2>
+ mov (8) uwDEST_U(2,8)<1> ubAVS_RESPONSE_2(5,1)<16;4,2>
+ mov (8) uwDEST_U(3,8)<1> ubAVS_RESPONSE_2(5,8+1)<16;4,2>
+ mov (8) uwDEST_U(4,8)<1> ubAVS_RESPONSE_2(8,1)<16;4,2>
+ mov (8) uwDEST_U(5,8)<1> ubAVS_RESPONSE_2(8,8+1)<16;4,2>
+ mov (8) uwDEST_U(6,8)<1> ubAVS_RESPONSE_2(9,1)<16;4,2>
+ mov (8) uwDEST_U(7,8)<1> ubAVS_RESPONSE_2(9,8+1)<16;4,2>
+
+ // Move 2st 8x8 words of V to dest GRF
+ mov (8) uwDEST_V(0,8)<1> ubAVS_RESPONSE_2(6,1)<16;4,2>
+ mov (8) uwDEST_V(1,8)<1> ubAVS_RESPONSE_2(6,8+1)<16;4,2>
+ mov (8) uwDEST_V(2,8)<1> ubAVS_RESPONSE_2(7,1)<16;4,2>
+ mov (8) uwDEST_V(3,8)<1> ubAVS_RESPONSE_2(7,8+1)<16;4,2>
+ mov (8) uwDEST_V(4,8)<1> ubAVS_RESPONSE_2(10,1)<16;4,2>
+ mov (8) uwDEST_V(5,8)<1> ubAVS_RESPONSE_2(10,8+1)<16;4,2>
+ mov (8) uwDEST_V(6,8)<1> ubAVS_RESPONSE_2(11,1)<16;4,2>
+ mov (8) uwDEST_V(7,8)<1> ubAVS_RESPONSE_2(11,8+1)<16;4,2>
+#endif
+
+ // Re-define new # of lines
+ #undef nUV_NUM_OF_ROWS
+ #undef nY_NUM_OF_ROWS
+
+ #define nY_NUM_OF_ROWS 8
+ #define nUV_NUM_OF_ROWS 8
+
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL2_Scaling.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL2_Scaling.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL2_Scaling.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL2_Scaling.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_8x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL3_Scaling.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL3_Scaling.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL3_Scaling.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL3_Scaling.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm
diff --git a/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
new file mode 100644
index 0000000..90089ac
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm
@@ -0,0 +1,107 @@
+/*
+ * All Video Processing kernels
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+
+#define DI_ENABLE
+
+ #include "DNDI.inc"
+
+ #undef nY_NUM_OF_ROWS
+ #define nY_NUM_OF_ROWS 8 // Number of Y rows per block (4 rows for each frame)
+ #undef nUV_NUM_OF_ROWS
+ #define nUV_NUM_OF_ROWS 8 // Number of U/V rows per block
+
+ #undef nSMPL_RESP_LEN
+ #define nSMPL_RESP_LEN nSMPL_RESP_LEN_DNDI // set the number of GRF
+ #undef nDPW_BLOCK_SIZE_HIST
+ #define nDPW_BLOCK_SIZE_HIST nBLOCK_WIDTH_4+nBLOCK_HEIGHT_1 // HIST Block Size for Write is 4x2
+ #undef nDPW_BLOCK_SIZE_DN
+ #define nDPW_BLOCK_SIZE_DN nBLOCK_WIDTH_16+nBLOCK_HEIGHT_4 // DN Block Size for Write is 16x4
+ #undef nDPR_BLOCK_SIZE_UV
+ #define nDPR_BLOCK_SIZE_UV nBLOCK_WIDTH_16+nBLOCK_HEIGHT_2 // DN Block Size for UV Write/Read is 16x2
+
+////////////////////////////////////// Run the DN Algorithm ///////////////////////////////////////
+ #include "DNDI_COMMAND.asm"
+
+////////////////////////////////////// Rearrange for Internal Planar //////////////////////////////
+ // move the previous frame Y component to internal planar format
+ $for (0; <nY_NUM_OF_ROWS/2; 1) {
+ mov (16) uwDEST_Y(%1,0)<1> ubRESP(nDI_PREV_FRAME_LUMA_OFFSET,%1*16)
+ }
+ // move the previous frame U,V components to internal planar format
+ $for (0; <nUV_NUM_OF_ROWS/2; 1) {
+ mov (8) uwDEST_U(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels
+ mov (8) uwDEST_V(0,%1*8)<1> ubRESP(nDI_PREV_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels
+ }
+ // move the current frame Y component to internal planar format
+ $for (0; <nY_NUM_OF_ROWS/2; 1) {
+ mov (16) uwDEST_Y(%1+4,0)<1> ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16)
+ }
+ // move the current frame U,V components to internal planar format
+ $for (0; <nUV_NUM_OF_ROWS/2; 1) {
+ mov (8) uwDEST_U(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16+1)<16;8,2> //U pixels
+ mov (8) uwDEST_V(2,%1*8)<1> ubRESP(nDI_CURR_FRAME_CHROMA_OFFSET,%1*16)<16;8,2> //V pixels
+ }
+
+////////////////////////////////////// Save the STMM Data for Next Run /////////////////////////
+ // Write STMM to memory
+ shr (1) rMSGSRC.0<1>:ud wORIX<0;1,0>:w 1:w // X origin / 2
+ mov (1) rMSGSRC.1<1>:ud wORIY<0;1,0>:w // Y origin
+ mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_STMM:ud // block width and height (8x4)
+ mov (8) mudMSGHDR_STMM(0)<1> rMSGSRC.0<8;8,1>:ud // message header
+ mov (8) mudMSGHDR_STMM(1)<1> udRESP(nDI_STMM_OFFSET,0) // Move STMM to MRF
+ send (8) dNULLREG mMSGHDR_STMM udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_STMM+nBI_STMM_HISTORY_OUTPUT:ud
+
+////////////////////////////////////// Save the History Data for Next Run /////////////////////////
+ #include "DI_Hist_Save.asm"
+
+////////////////////////////////////// Save the DN Curr Frame for Next Run ////////////////////////
+ add (4) pCF_Y_OFFSET<1>:uw ubSRC_CF_OFFSET<4;4,1>:ub npDN_YUV:w
+ // check top/bottom field first
+ cmp.e.f0.0 (1) null<1>:w ubTFLD_FIRST<0;1,0>:ub 1:w
+ (f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+ $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) {
+ mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,4)<4;4,1> // 1st field luma from current frame (line 1,3)
+ }
+ jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+ $for (0,0; <nY_NUM_OF_ROWS/2; 2,1) {
+ mov (4) mudMSGHDR_DN(1,%1*4)<1> udRESP(nDI_CURR_FRAME_LUMA_OFFSET+%2,0)<4;4,1> // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN(1,%1*4+4)<1> udRESP(nDI_CURR_2ND_FIELD_LUMA_OFFSET,%2*4)<4;4,1> // 1st field luma from current frame (line 1,3)
+ }
+SAVE_DN_CURR:
+ $for (0; <nY_NUM_OF_ROWS/2; 1) {
+ mov (16) mubMSGHDR_DN(1, %1*16)<1> ubRESP(nDI_CURR_FRAME_LUMA_OFFSET,%1*16)
+ }
+
+ mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin
+ mov (1) rMSGSRC.2<1>:ud nDPW_BLOCK_SIZE_DN:ud // block width and height (16x4)
+ mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud
+ send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nDPMW_MSG_LEN_PL_DN_DI+nBI_DESTINATION_Y:ud
+
+
+/////////////////////////////NV12 UV Copy 422/////////////////////////////////////////////////////
+ //Read UV through DATAPORT
+ add (2) rMSGSRC.0<1>:d wORIX<2;2,1>:w wSRC_H_ORI_OFFSET<2;2,1>:w // Source Y Block origin
+ asr (1) rMSGSRC.1<1>:d rMSGSRC.1<0;1,0>:d 1:w // U/V block origin should be half of Y's
+ mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // U/V block width and height (16x2)
+ mov (8) mudMSGHDR_DN<1> rMSGSRC<8;8,1>:ud
+ send (8) udBOT_U_IO(0)<1> mMSGHDR_DN udDUMMY_NULL nDATAPORT_READ nDPMR_MSGDSC+nRESLEN_1+nBI_CURRENT_SRC_UV:ud
+
+ //Write UV through DATAPORT
+ mov (2) rMSGSRC.0<1>:ud wORIX<2;2,1>:w // X origin and Y origin
+ asr (1) rMSGSRC.1<1>:d rMSGSRC.1<0;1,0>:d 1:w // U/V block origin should be half of Y's
+ mov (1) rMSGSRC.2<1>:ud nDPR_BLOCK_SIZE_UV:ud // block width and height (16x2)
+ mov (8) mudMSGHDR_DN(0)<1> rMSGSRC.0<8;8,1>:ud
+ mov (8) mudMSGHDR_DN(1)<1> udBOT_U_IO(0)<8;8,1>
+ send (8) dNULLREG mMSGHDR_DN udDUMMY_NULL nDATAPORT_WRITE nDPMW_MSGDSC+nMSGLEN_1+nBI_DESTINATION_UV:ud
\ No newline at end of file
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm
diff --git a/src/shaders/post_processing/Core_Kernels/PL_DN_ALG.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/PL_DN_ALG.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/PL_DN_ALG.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/PL_DN_ALG.asm
diff --git a/src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm
diff --git a/src/shaders/post_processing/Core_Kernels/RGB_Scaling.asm b/src/shaders/post_processing/gen5_6/Core_Kernels/RGB_Scaling.asm
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/RGB_Scaling.asm
rename to src/shaders/post_processing/gen5_6/Core_Kernels/RGB_Scaling.asm
diff --git a/src/shaders/post_processing/Core_Kernels/Scaling.inc b/src/shaders/post_processing/gen5_6/Core_Kernels/Scaling.inc
similarity index 100%
rename from src/shaders/post_processing/Core_Kernels/Scaling.inc
rename to src/shaders/post_processing/gen5_6/Core_Kernels/Scaling.inc
diff --git a/src/shaders/post_processing/gen5_6/Makefile.am b/src/shaders/post_processing/gen5_6/Makefile.am
new file mode 100755
index 0000000..8642e61
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/Makefile.am
@@ -0,0 +1,215 @@
+
+INTEL_G4I =
+
+INTEL_G4A = null.g4a
+INTEL_G4B = null.g4b
+INTEL_G4B_GEN5 = null.g4b.gen5
+INTEL_G6A = null.g6a
+INTEL_G6B = null.g6b
+
+INTEL_PP_G4B_GEN5 = \
+ nv12_avs_nv12.g4b.gen5 \
+ nv12_dn_nv12.g4b.gen5 \
+ nv12_dndi_nv12.g4b.gen5 \
+ nv12_load_save_nv12.g4b.gen5 \
+ nv12_load_save_pl3.g4b.gen5 \
+ nv12_scaling_nv12.g4b.gen5 \
+ pl3_load_save_nv12.g4b.gen5 \
+ pl3_load_save_pl3.g4b.gen5 \
+ pl3_load_save_pa.g4b.gen5 \
+ nv12_load_save_pa.g4b.gen5 \
+ pa_load_save_nv12.g4b.gen5 \
+ pa_load_save_pl3.g4b.gen5 \
+ $(NULL)
+
+INTEL_PP_G6B = \
+ nv12_avs_nv12.g6b \
+ nv12_dn_nv12.g6b \
+ nv12_dndi_nv12.g6b \
+ nv12_load_save_nv12.g6b \
+ nv12_load_save_pl3.g6b \
+ nv12_scaling_nv12.g6b \
+ pl3_load_save_nv12.g6b \
+ pl3_load_save_pl3.g6b \
+ pl3_load_save_pa.g6b \
+ nv12_load_save_pa.g6b \
+ pa_load_save_nv12.g6b \
+ pa_load_save_pl3.g6b \
+ $(NULL)
+
+INTEL_PP_ASM = \
+ nv12_avs_nv12.asm \
+ nv12_dn_nv12.asm \
+ nv12_dndi_nv12.asm \
+ nv12_load_save_nv12.asm \
+ nv12_load_save_pl3.asm \
+ nv12_scaling_nv12.asm \
+ pl3_load_save_nv12.asm \
+ pl3_load_save_pl3.asm \
+ pl3_load_save_pa.asm \
+ nv12_load_save_pa.asm \
+ pa_load_save_nv12.asm \
+ pa_load_save_pl3.asm \
+ $(NULL)
+
+INTEL_PP_ASM += \
+ Common/AYUV_Load_16x8.asm \
+ Common/IMC3_Load_8x4.asm \
+ Common/IMC3_Load_8x5.asm \
+ Common/IMC3_Load_9x5.asm \
+ Common/Init_All_Regs.asm \
+ Common/Multiple_Loop.asm \
+ Common/Multiple_Loop_Head.asm \
+ Common/NV11_Load_4x8.asm \
+ Common/NV11_Load_5x8.asm \
+ Common/NV12_Load_8x4.asm \
+ Common/NV12_Load_8x5.asm \
+ Common/NV12_Load_9x5.asm \
+ Common/P208_Load_8x8.asm \
+ Common/P208_Load_9x8.asm \
+ Common/PA_Load_8x8.asm \
+ Common/PA_Load_9x8.asm \
+ Common/PL16x8_PL8x4.asm \
+ Common/PL16x8_PL8x8.asm \
+ Common/PL4x8_Save_NV11.asm \
+ Common/PL5x8_PL16x8.asm \
+ Common/PL5x8_PL8x8.asm \
+ Common/PL8x4_Save_IMC3.asm \
+ Common/PL8x4_Save_NV12.asm \
+ Common/PL8x5_PL8x8.asm \
+ Common/PL8x8_PL8x4.asm \
+ Common/PL8x8_Save_P208.asm \
+ Common/PL8x8_Save_PA.asm \
+ Common/PL9x5_PL16x8.asm \
+ Common/PL9x8_PL16x8.asm \
+ Common/RGB16x8_Save_RGB.asm \
+ Common/RGB16x8_Save_RGB16.asm \
+ Common/RGB16x8_Save_Y416.asm \
+ Common/RGB_Pack.asm \
+ Common/SetupVPKernel.asm \
+ Common/readSampler16x1.asm \
+ Core_Kernels/AVS_SetupFirstBlock.asm \
+ Core_Kernels/AVS_SetupSecondBlock.asm \
+ Core_Kernels/DI_Hist_Save.asm \
+ Core_Kernels/DI_SAVE_PA.asm \
+ Core_Kernels/DNDI_COMMAND.asm \
+ Core_Kernels/DNDI_Hist_Save.asm \
+ Core_Kernels/PA_AVS_IEF_16x8.asm \
+ Core_Kernels/PA_AVS_IEF_8x4.asm \
+ Core_Kernels/PA_AVS_IEF_8x8.asm \
+ Core_Kernels/PA_AVS_IEF_Sample.asm \
+ Core_Kernels/PA_AVS_IEF_Unpack_16x8.asm \
+ Core_Kernels/PA_AVS_IEF_Unpack_8x4.asm \
+ Core_Kernels/PA_AVS_IEF_Unpack_8x8.asm \
+ Core_Kernels/PA_DNDI_ALG.asm \
+ Core_Kernels/PA_DN_ALG.asm \
+ Core_Kernels/PA_Scaling.asm \
+ Core_Kernels/PL2_AVS_IEF_16x8.asm \
+ Core_Kernels/PL2_AVS_IEF_8x4.asm \
+ Core_Kernels/PL2_AVS_IEF_8x8.asm \
+ Core_Kernels/PL2_AVS_IEF_Unpack_16x8.asm \
+ Core_Kernels/PL2_AVS_IEF_Unpack_8x4.asm \
+ Core_Kernels/PL2_AVS_IEF_Unpack_8x8.asm \
+ Core_Kernels/PL2_Scaling.asm \
+ Core_Kernels/PL3_AVS_IEF_16x8.asm \
+ Core_Kernels/PL3_AVS_IEF_8x4.asm \
+ Core_Kernels/PL3_AVS_IEF_8x8.asm \
+ Core_Kernels/PL3_AVS_IEF_Unpack_16x8.asm \
+ Core_Kernels/PL3_AVS_IEF_Unpack_8x4.asm \
+ Core_Kernels/PL3_AVS_IEF_Unpack_8x8.asm \
+ Core_Kernels/PL3_Scaling.asm \
+ Core_Kernels/PL_DNDI_ALG.asm \
+ Core_Kernels/PL_DNDI_ALG_UVCopy_NV11.asm \
+ Core_Kernels/PL_DNDI_ALG_UVCopy_NV12.asm \
+ Core_Kernels/PL_DNDI_ALG_UVCopy_P208.asm \
+ Core_Kernels/PL_DNDI_ALG_UVCopy_PL3.asm \
+ Core_Kernels/PL_DN_ALG.asm \
+ Core_Kernels/RGB_AVS_IEF_16x8.asm \
+ Core_Kernels/RGB_AVS_IEF_Unpack_16x8.asm \
+ Core_Kernels/RGB_AVS_IEF_Unscramble_16x8.asm \
+ Core_Kernels/RGB_Scaling.asm \
+ $(NULL)
+
+INTEL_PP_INC = \
+ Common/AYUV_Load_16x8.inc \
+ Common/Expansion.inc \
+ Common/PA_Load.inc \
+ Common/PL2_Load.inc \
+ Common/PL3_Load.inc \
+ Common/PL4x8_Save_NV11.inc \
+ Common/PL8x4_Save_IMC3.inc \
+ Common/PL8x4_Save_NV12.inc \
+ Common/PL8x8_PL8x4.inc \
+ Common/PL8x8_Save_P208.inc \
+ Common/PL8x8_Save_PA.inc \
+ Common/RGB16x8_Save_RGB.inc \
+ Common/RGB16x8_Save_RGB16.inc \
+ Common/RGB16x8_Save_Y416.inc \
+ Common/common.inc \
+ Common/undefall.inc \
+ Core_Kernels/AVS_IEF.inc \
+ Core_Kernels/DI.inc \
+ Core_Kernels/DNDI.inc \
+ Core_Kernels/Scaling.inc
+ $(NULL)
+
+INTEL_PP_GEN5_ASM = $(INTEL_PP_G4B_GEN5:%.g4b.gen5=%.g5s)
+INTEL_PP_GEN6_ASM = $(INTEL_PP_G6B:%.g6b=%.g6s)
+
+TARGETS =
+if HAVE_GEN4ASM
+TARGETS += $(INTEL_PP_G4B_GEN5)
+TARGETS += $(INTEL_PP_G6B)
+endif
+
+all-local: $(TARGETS)
+
+SUFFIXES = .g4a .g4b .g6a .g6b .g5s .g6s .asm
+
+if HAVE_GEN4ASM
+.g4a.g4b:
+ $(AM_V_GEN)m4 $*.g4a > $*.g4m && \
+ $(AM_V_GEN)$(GEN4ASM) -o $@ $*.g4m && \
+ $(AM_V_GEN)$(GEN4ASM) -g 5 -o $@.gen5 $*.g4m && \
+ rm $*.g4m
+
+.g6a.g6b:
+ $(AM_V_GEN)m4 $< > $*.g6m && \
+ $(AM_V_GEN)$(GEN4ASM) -g 6 -o $@ $*.g6m && \
+ rm $*.g6m
+
+$(INTEL_G4B): $(INTEL_G4I)
+
+$(INTEL_PP_GEN5_ASM): $(INTEL_PP_ASM)
+.asm.g5s:
+ $(AM_V_GEN)cpp -D DEV_ILK -I Common/ -I Core_Kernels $< > _pp0.$@; \
+ ../../gpp.py _pp0.$@ $@; \
+ rm _pp0.$@
+.g5s.g4b.gen5:
+ $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 5 $<
+
+$(INTEL_PP_GEN6_ASM): $(INTEL_PP_ASM)
+.asm.g6s:
+ $(AM_V_GEN)cpp -D GT -I Common/ -I Core_Kernels $< > _pp0.$@; \
+ ../../gpp.py _pp0.$@ $@; \
+ rm _pp0.$@
+.g6s.g6b:
+ $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 6 $<
+endif
+
+CLEANFILES = $(INTEL_PP_GEN5_ASM) $(INTEL_PP_GEN6_ASM)
+
+EXTRA_DIST = \
+ $(INTEL_G4A) \
+ $(INTEL_G4B) \
+ $(INTEL_G4B_GEN5) \
+ $(INTEL_G4I) \
+ $(INTEL_G6B) \
+ $(INTEL_PP_ASM) \
+ $(INTEL_PP_G4B_GEN5) \
+ $(INTEL_PP_G6B) \
+ $(INTEL_PP_INC) \
+ $(NULL)
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
diff --git a/src/shaders/post_processing/null.g4a b/src/shaders/post_processing/gen5_6/null.g4a
similarity index 100%
rename from src/shaders/post_processing/null.g4a
rename to src/shaders/post_processing/gen5_6/null.g4a
diff --git a/src/shaders/post_processing/null.g4b b/src/shaders/post_processing/gen5_6/null.g4b
similarity index 100%
rename from src/shaders/post_processing/null.g4b
rename to src/shaders/post_processing/gen5_6/null.g4b
diff --git a/src/shaders/post_processing/null.g4b.gen5 b/src/shaders/post_processing/gen5_6/null.g4b.gen5
similarity index 100%
rename from src/shaders/post_processing/null.g4b.gen5
rename to src/shaders/post_processing/gen5_6/null.g4b.gen5
diff --git a/src/shaders/post_processing/null.g6a b/src/shaders/post_processing/gen5_6/null.g6a
similarity index 100%
rename from src/shaders/post_processing/null.g6a
rename to src/shaders/post_processing/gen5_6/null.g6a
diff --git a/src/shaders/post_processing/null.g6b b/src/shaders/post_processing/gen5_6/null.g6b
similarity index 100%
rename from src/shaders/post_processing/null.g6b
rename to src/shaders/post_processing/gen5_6/null.g6b
diff --git a/src/shaders/post_processing/gen5_6/nv12_avs_nv12.asm b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.asm
new file mode 100644
index 0000000..6e0e1b3
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.asm
@@ -0,0 +1,20 @@
+// Module name: NV12_AVS_NV12
+.kernel NV12_AVS_NV12
+.code
+
+#define INC_SCALING
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "PL2_AVS_IEF_16x8.asm"
+#include "PL16x8_PL8x4.asm"
+#include "PL8x4_Save_NV12.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD // End of Thread
+
+.end_code
+
+.end_kernel
+
+// end of nv12_avs_nv12.asm
diff --git a/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5
new file mode 100644
index 0000000..6685b46
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5
@@ -0,0 +1,170 @@
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x01000005, 0x20002d2c, 0x00000088, 0x80008000 },
+ { 0x00010001, 0x20c003fd, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x212003bd, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x212403bd, 0x000000bc, 0x00000000 },
+ { 0x00000001, 0x213403bd, 0x00000038, 0x00000000 },
+ { 0x00200001, 0x612803bd, 0x004500a4, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
+ { 0x00802001, 0x20000022, 0x008d0100, 0x00000000 },
+ { 0x00000031, 0x25401c09, 0x208d0000, 0x044bb401 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
+ { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x02000031, 0x25c01c09, 0x208d0000, 0x048bb802 },
+ { 0x00000001, 0x240803bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x000000bc, 0x41000000 },
+ { 0x00000048, 0x21287fbd, 0x000000c0, 0x41e00000 },
+ { 0x00000001, 0x240403bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x21247fbd, 0x000000c0, 0x41000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
+ { 0x00802001, 0x20000022, 0x008d0100, 0x00000000 },
+ { 0x00000031, 0x27401c09, 0x208d0000, 0x044bb401 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
+ { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x02000031, 0x27c01c09, 0x208d0000, 0x048bb802 },
+ { 0x00600001, 0x21400229, 0x00aa0541, 0x00000000 },
+ { 0x00600001, 0x21600229, 0x00aa0549, 0x00000000 },
+ { 0x00600001, 0x21800229, 0x00aa0561, 0x00000000 },
+ { 0x00600001, 0x21a00229, 0x00aa0569, 0x00000000 },
+ { 0x00600001, 0x21c00229, 0x00aa0581, 0x00000000 },
+ { 0x00600001, 0x21e00229, 0x00aa0589, 0x00000000 },
+ { 0x00600001, 0x22000229, 0x00aa05a1, 0x00000000 },
+ { 0x00600001, 0x22200229, 0x00aa05a9, 0x00000000 },
+ { 0x00600001, 0x22400229, 0x00aa05c1, 0x00000000 },
+ { 0x00600001, 0x22600229, 0x00aa05c9, 0x00000000 },
+ { 0x00600001, 0x22800229, 0x00aa05e1, 0x00000000 },
+ { 0x00600001, 0x22a00229, 0x00aa05e9, 0x00000000 },
+ { 0x00600001, 0x22c00229, 0x00aa0641, 0x00000000 },
+ { 0x00600001, 0x22e00229, 0x00aa0649, 0x00000000 },
+ { 0x00600001, 0x23000229, 0x00aa0661, 0x00000000 },
+ { 0x00600001, 0x23200229, 0x00aa0669, 0x00000000 },
+ { 0x00600001, 0x23400229, 0x00aa0601, 0x00000000 },
+ { 0x00600001, 0x23600229, 0x00aa0609, 0x00000000 },
+ { 0x00600001, 0x23800229, 0x00aa0621, 0x00000000 },
+ { 0x00600001, 0x23a00229, 0x00aa0629, 0x00000000 },
+ { 0x00600001, 0x23c00229, 0x00aa0681, 0x00000000 },
+ { 0x00600001, 0x23e00229, 0x00aa0689, 0x00000000 },
+ { 0x00600001, 0x24000229, 0x00aa06a1, 0x00000000 },
+ { 0x00600001, 0x24200229, 0x00aa06a9, 0x00000000 },
+ { 0x00600001, 0x21500229, 0x00aa0741, 0x00000000 },
+ { 0x00600001, 0x21700229, 0x00aa0749, 0x00000000 },
+ { 0x00600001, 0x21900229, 0x00aa0761, 0x00000000 },
+ { 0x00600001, 0x21b00229, 0x00aa0769, 0x00000000 },
+ { 0x00600001, 0x21d00229, 0x00aa0781, 0x00000000 },
+ { 0x00600001, 0x21f00229, 0x00aa0789, 0x00000000 },
+ { 0x00600001, 0x22100229, 0x00aa07a1, 0x00000000 },
+ { 0x00600001, 0x22300229, 0x00aa07a9, 0x00000000 },
+ { 0x00600001, 0x22500229, 0x00aa07c1, 0x00000000 },
+ { 0x00600001, 0x22700229, 0x00aa07c9, 0x00000000 },
+ { 0x00600001, 0x22900229, 0x00aa07e1, 0x00000000 },
+ { 0x00600001, 0x22b00229, 0x00aa07e9, 0x00000000 },
+ { 0x00600001, 0x22d00229, 0x00aa0841, 0x00000000 },
+ { 0x00600001, 0x22f00229, 0x00aa0849, 0x00000000 },
+ { 0x00600001, 0x23100229, 0x00aa0861, 0x00000000 },
+ { 0x00600001, 0x23300229, 0x00aa0869, 0x00000000 },
+ { 0x00600001, 0x23500229, 0x00aa0801, 0x00000000 },
+ { 0x00600001, 0x23700229, 0x00aa0809, 0x00000000 },
+ { 0x00600001, 0x23900229, 0x00aa0821, 0x00000000 },
+ { 0x00600001, 0x23b00229, 0x00aa0829, 0x00000000 },
+ { 0x00600001, 0x23d00229, 0x00aa0881, 0x00000000 },
+ { 0x00600001, 0x23f00229, 0x00aa0889, 0x00000000 },
+ { 0x00600001, 0x24100229, 0x00aa08a1, 0x00000000 },
+ { 0x00600001, 0x24300229, 0x00aa08a9, 0x00000000 },
+ { 0x00600001, 0x22400129, 0x00ae0240, 0x00000000 },
+ { 0x00600001, 0x23400129, 0x00ae0340, 0x00000000 },
+ { 0x00600001, 0x22500129, 0x00ae0280, 0x00000000 },
+ { 0x00600001, 0x23500129, 0x00ae0380, 0x00000000 },
+ { 0x00600001, 0x22600129, 0x00ae02c0, 0x00000000 },
+ { 0x00600001, 0x23600129, 0x00ae03c0, 0x00000000 },
+ { 0x00600001, 0x22700129, 0x00ae0300, 0x00000000 },
+ { 0x00600001, 0x23700129, 0x00ae0400, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+ { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+ { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+ { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+ { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+ { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+ { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+ { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+ { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+ { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+ { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+ { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+ { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00000041, 0x24407fbd, 0x000000bc, 0x41800000 },
+ { 0x00000040, 0x20a477bd, 0x00000440, 0x000000a4 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xfffffece },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000001, 0x20a403bd, 0x00000094, 0x00000000 },
+ { 0x00000041, 0x24407fbd, 0x00000038, 0x41000000 },
+ { 0x00000040, 0x20a877bd, 0x00000440, 0x000000a8 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xfffffec2 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g6b
new file mode 100644
index 0000000..5868243
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_avs_nv12.g6b
@@ -0,0 +1,243 @@
+ { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x01000005, 0x20002d2c, 0x00000088, 0x80008000 },
+ { 0x00010001, 0x20c003fd, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x212003bd, 0x000000c0, 0x00000000 },
+ { 0x00000001, 0x212403bd, 0x000000bc, 0x00000000 },
+ { 0x00000001, 0x213403bd, 0x00000038, 0x00000000 },
+ { 0x00200001, 0x612803bd, 0x004500a4, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
+ { 0x00800001, 0x20000022, 0x008d0100, 0x00000000 },
+ { 0x02000031, 0x25401cc9, 0x00000000, 0x044bb401 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
+ { 0x00800001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x02000031, 0x25c01cc9, 0x00000040, 0x048bb802 },
+ { 0x00000001, 0x240803bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x000000bc, 0x41000000 },
+ { 0x00000048, 0x21287fbd, 0x000000c0, 0x41e00000 },
+ { 0x00000001, 0x240403bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x21247fbd, 0x000000c0, 0x41000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
+ { 0x00800001, 0x20000022, 0x008d0100, 0x00000000 },
+ { 0x02000031, 0x27401cc9, 0x00000000, 0x044bb401 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
+ { 0x00800001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x02000031, 0x27c01cc9, 0x00000040, 0x048bb802 },
+ { 0x00600001, 0x21400229, 0x00aa0541, 0x00000000 },
+ { 0x00600001, 0x21600229, 0x00aa0549, 0x00000000 },
+ { 0x00600001, 0x21800229, 0x00aa0561, 0x00000000 },
+ { 0x00600001, 0x21a00229, 0x00aa0569, 0x00000000 },
+ { 0x00600001, 0x21c00229, 0x00aa0581, 0x00000000 },
+ { 0x00600001, 0x21e00229, 0x00aa0589, 0x00000000 },
+ { 0x00600001, 0x22000229, 0x00aa05a1, 0x00000000 },
+ { 0x00600001, 0x22200229, 0x00aa05a9, 0x00000000 },
+ { 0x00600001, 0x22400229, 0x00aa05c1, 0x00000000 },
+ { 0x00600001, 0x22600229, 0x00aa05c9, 0x00000000 },
+ { 0x00600001, 0x22800229, 0x00aa05e1, 0x00000000 },
+ { 0x00600001, 0x22a00229, 0x00aa05e9, 0x00000000 },
+ { 0x00600001, 0x22c00229, 0x00aa0641, 0x00000000 },
+ { 0x00600001, 0x22e00229, 0x00aa0649, 0x00000000 },
+ { 0x00600001, 0x23000229, 0x00aa0661, 0x00000000 },
+ { 0x00600001, 0x23200229, 0x00aa0669, 0x00000000 },
+ { 0x00600001, 0x23400229, 0x00aa0601, 0x00000000 },
+ { 0x00600001, 0x23600229, 0x00aa0609, 0x00000000 },
+ { 0x00600001, 0x23800229, 0x00aa0621, 0x00000000 },
+ { 0x00600001, 0x23a00229, 0x00aa0629, 0x00000000 },
+ { 0x00600001, 0x23c00229, 0x00aa0681, 0x00000000 },
+ { 0x00600001, 0x23e00229, 0x00aa0689, 0x00000000 },
+ { 0x00600001, 0x24000229, 0x00aa06a1, 0x00000000 },
+ { 0x00600001, 0x24200229, 0x00aa06a9, 0x00000000 },
+ { 0x00600001, 0x21500229, 0x00aa0741, 0x00000000 },
+ { 0x00600001, 0x21700229, 0x00aa0749, 0x00000000 },
+ { 0x00600001, 0x21900229, 0x00aa0761, 0x00000000 },
+ { 0x00600001, 0x21b00229, 0x00aa0769, 0x00000000 },
+ { 0x00600001, 0x21d00229, 0x00aa0781, 0x00000000 },
+ { 0x00600001, 0x21f00229, 0x00aa0789, 0x00000000 },
+ { 0x00600001, 0x22100229, 0x00aa07a1, 0x00000000 },
+ { 0x00600001, 0x22300229, 0x00aa07a9, 0x00000000 },
+ { 0x00600001, 0x22500229, 0x00aa07c1, 0x00000000 },
+ { 0x00600001, 0x22700229, 0x00aa07c9, 0x00000000 },
+ { 0x00600001, 0x22900229, 0x00aa07e1, 0x00000000 },
+ { 0x00600001, 0x22b00229, 0x00aa07e9, 0x00000000 },
+ { 0x00600001, 0x22d00229, 0x00aa0841, 0x00000000 },
+ { 0x00600001, 0x22f00229, 0x00aa0849, 0x00000000 },
+ { 0x00600001, 0x23100229, 0x00aa0861, 0x00000000 },
+ { 0x00600001, 0x23300229, 0x00aa0869, 0x00000000 },
+ { 0x00600001, 0x23500229, 0x00aa0801, 0x00000000 },
+ { 0x00600001, 0x23700229, 0x00aa0809, 0x00000000 },
+ { 0x00600001, 0x23900229, 0x00aa0821, 0x00000000 },
+ { 0x00600001, 0x23b00229, 0x00aa0829, 0x00000000 },
+ { 0x00600001, 0x23d00229, 0x00aa0881, 0x00000000 },
+ { 0x00600001, 0x23f00229, 0x00aa0889, 0x00000000 },
+ { 0x00600001, 0x24100229, 0x00aa08a1, 0x00000000 },
+ { 0x00600001, 0x24300229, 0x00aa08a9, 0x00000000 },
+ { 0x00600001, 0x22400129, 0x00ae0240, 0x00000000 },
+ { 0x00600001, 0x23400129, 0x00ae0340, 0x00000000 },
+ { 0x00600001, 0x22500129, 0x00ae0280, 0x00000000 },
+ { 0x00600001, 0x23500129, 0x00ae0380, 0x00000000 },
+ { 0x00600001, 0x22600129, 0x00ae02c0, 0x00000000 },
+ { 0x00600001, 0x23600129, 0x00ae03c0, 0x00000000 },
+ { 0x00600001, 0x22700129, 0x00ae0300, 0x00000000 },
+ { 0x00600001, 0x23700129, 0x00ae0400, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+ { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000020, 0x02298008 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+ { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+ { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+ { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+ { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+ { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+ { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+ { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+ { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+ { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+ { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+ { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x06094008 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00000041, 0x24407fbd, 0x000000bc, 0x41800000 },
+ { 0x00000040, 0x20a477bd, 0x00000440, 0x000000a4 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xfffffece },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000001, 0x20a403bd, 0x00000094, 0x00000000 },
+ { 0x00000041, 0x24407fbd, 0x00000038, 0x41000000 },
+ { 0x00000040, 0x20a877bd, 0x00000440, 0x000000a8 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xfffffec2 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_dn_nv12.asm b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.asm
new file mode 100644
index 0000000..690d4c9
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.asm
@@ -0,0 +1,26 @@
+// Module name: NV12_DN_NV12
+.kernel NV12_DN_NV12
+.code
+
+#define INC_DN
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+
+#define LOAD_UV_ONLY
+#include "NV12_Load_8x4.asm"
+#undef LOAD_UV_ONLY
+
+#include "PL_DN_ALG.asm"
+
+#include "PL8x4_Save_NV12.asm"
+
+#include "Multiple_Loop.asm"
+
+END_THREAD // End of Thread
+
+.end_code
+
+.end_kernel
+
+// end of nv12_dn_nv12.asm
diff --git a/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5
new file mode 100644
index 0000000..13164b3
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5
@@ -0,0 +1,113 @@
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x02600031, 0x28000c01, 0x408d0000, 0x0228a002 },
+ { 0x00800001, 0x22600229, 0x00d20820, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x00d20821, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x00d20800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x00d20801, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
+ { 0x01600031, 0x24400c01, 0x208d0000, 0x045b8004 },
+ { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10480, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10490, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b104a0, 0x00000000 },
+ { 0x00800001, 0x22200229, 0x00b104b0, 0x00000000 },
+ { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
+ { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00010003 },
+ { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
+ { 0x00200001, 0x21c00022, 0x004504c0, 0x00000000 },
+ { 0x0d600031, 0x20000c04, 0x508d0000, 0x04082014 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+ { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+ { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+ { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+ { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+ { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+ { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+ { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+ { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+ { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+ { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+ { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+ { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff3a },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff34 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g6b
new file mode 100644
index 0000000..0ba2f55
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_dn_nv12.g6b
@@ -0,0 +1,186 @@
+ { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000040, 0x02298002 },
+ { 0x00800001, 0x22600229, 0x00d20820, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x00d20821, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x00d20800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x00d20801, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
+ { 0x02600031, 0x24400cc1, 0x00000020, 0x045b8004 },
+ { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10480, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10490, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b104a0, 0x00000000 },
+ { 0x00800001, 0x22200229, 0x00b104b0, 0x00000000 },
+ { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
+ { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00010003 },
+ { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
+ { 0x00200001, 0x21c00022, 0x004504c0, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x000001a0, 0x04094014 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+ { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000020, 0x02298008 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+ { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+ { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+ { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+ { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+ { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+ { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+ { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+ { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+ { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+ { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+ { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x06094008 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff3a },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff34 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/nv12_dndi_nv12.asm b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.asm
similarity index 100%
rename from src/shaders/post_processing/nv12_dndi_nv12.asm
rename to src/shaders/post_processing/gen5_6/nv12_dndi_nv12.asm
diff --git a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5
new file mode 100644
index 0000000..aee45d1
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5
@@ -0,0 +1,90 @@
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
+ { 0x01600031, 0x24400c01, 0x208d0000, 0x04cb8004 },
+ { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
+ { 0x00600001, 0x22400229, 0x00ae0481, 0x00000000 },
+ { 0x00600001, 0x23400229, 0x00ae0480, 0x00000000 },
+ { 0x00600001, 0x22500229, 0x00ae0491, 0x00000000 },
+ { 0x00600001, 0x23500229, 0x00ae0490, 0x00000000 },
+ { 0x00600001, 0x22600229, 0x00ae04a1, 0x00000000 },
+ { 0x00600001, 0x23600229, 0x00ae04a0, 0x00000000 },
+ { 0x00600001, 0x22700229, 0x00ae04b1, 0x00000000 },
+ { 0x00600001, 0x23700229, 0x00ae04b0, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b104c0, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b104d0, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b104e0, 0x00000000 },
+ { 0x00800001, 0x22200229, 0x00b104f0, 0x00000000 },
+ { 0x00600001, 0x22800229, 0x00ae0501, 0x00000000 },
+ { 0x00600001, 0x23800229, 0x00ae0500, 0x00000000 },
+ { 0x00600001, 0x22900229, 0x00ae0511, 0x00000000 },
+ { 0x00600001, 0x23900229, 0x00ae0510, 0x00000000 },
+ { 0x00600001, 0x22a00229, 0x00ae0521, 0x00000000 },
+ { 0x00600001, 0x23a00229, 0x00ae0520, 0x00000000 },
+ { 0x00600001, 0x22b00229, 0x00ae0531, 0x00000000 },
+ { 0x00600001, 0x23b00229, 0x00ae0530, 0x00000000 },
+ { 0x00000008, 0x21003da1, 0x000000a0, 0x00010001 },
+ { 0x00000001, 0x210401a1, 0x000000a2, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x21600022, 0x008d0100, 0x00000000 },
+ { 0x00600001, 0x21800022, 0x008d0540, 0x00000000 },
+ { 0x0b600031, 0x20000c04, 0x508d0000, 0x04082014 },
+ { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
+ { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 },
+ { 0x0d600031, 0x20000c04, 0x508d0000, 0x04082014 },
+ { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+ { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a },
+ { 0x00400001, 0x20400022, 0x00690580, 0x00000000 },
+ { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 },
+ { 0x00400001, 0x20600022, 0x00690590, 0x00000000 },
+ { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 },
+ { 0x00400001, 0x20500022, 0x00690580, 0x00000000 },
+ { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 },
+ { 0x00400001, 0x20700022, 0x00690590, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00b104c0, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00b104d0, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00b104e0, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00b104f0, 0x00000000 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x06082007 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x28000c01, 0x408d0000, 0x0218a002 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008d0800, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff68 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff62 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b
new file mode 100644
index 0000000..29003af
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b
@@ -0,0 +1,163 @@
+ { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
+ { 0x02600031, 0x24400cc1, 0x00000020, 0x04cb8004 },
+ { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
+ { 0x00600001, 0x22400229, 0x00ae0481, 0x00000000 },
+ { 0x00600001, 0x23400229, 0x00ae0480, 0x00000000 },
+ { 0x00600001, 0x22500229, 0x00ae0491, 0x00000000 },
+ { 0x00600001, 0x23500229, 0x00ae0490, 0x00000000 },
+ { 0x00600001, 0x22600229, 0x00ae04a1, 0x00000000 },
+ { 0x00600001, 0x23600229, 0x00ae04a0, 0x00000000 },
+ { 0x00600001, 0x22700229, 0x00ae04b1, 0x00000000 },
+ { 0x00600001, 0x23700229, 0x00ae04b0, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b104c0, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b104d0, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b104e0, 0x00000000 },
+ { 0x00800001, 0x22200229, 0x00b104f0, 0x00000000 },
+ { 0x00600001, 0x22800229, 0x00ae0501, 0x00000000 },
+ { 0x00600001, 0x23800229, 0x00ae0500, 0x00000000 },
+ { 0x00600001, 0x22900229, 0x00ae0511, 0x00000000 },
+ { 0x00600001, 0x23900229, 0x00ae0510, 0x00000000 },
+ { 0x00600001, 0x22a00229, 0x00ae0521, 0x00000000 },
+ { 0x00600001, 0x23a00229, 0x00ae0520, 0x00000000 },
+ { 0x00600001, 0x22b00229, 0x00ae0531, 0x00000000 },
+ { 0x00600001, 0x23b00229, 0x00ae0530, 0x00000000 },
+ { 0x00000008, 0x21003da1, 0x000000a0, 0x00010001 },
+ { 0x00000001, 0x210401a1, 0x000000a2, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x21600022, 0x008d0100, 0x00000000 },
+ { 0x00600001, 0x21800022, 0x008d0540, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000160, 0x04094014 },
+ { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
+ { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00000003 },
+ { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x000001a0, 0x04094014 },
+ { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+ { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a },
+ { 0x00400001, 0x20400022, 0x00690580, 0x00000000 },
+ { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 },
+ { 0x00400001, 0x20600022, 0x00690590, 0x00000000 },
+ { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 },
+ { 0x00400001, 0x20500022, 0x00690580, 0x00000000 },
+ { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 },
+ { 0x00400001, 0x20700022, 0x00690590, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00b104c0, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00b104d0, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00b104e0, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00b104f0, 0x00000000 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x06094007 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000020, 0x02198002 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008d0800, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff68 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff62 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/nv12_load_save_nv12.asm b/src/shaders/post_processing/gen5_6/nv12_load_save_nv12.asm
similarity index 100%
rename from src/shaders/post_processing/nv12_load_save_nv12.asm
rename to src/shaders/post_processing/gen5_6/nv12_load_save_nv12.asm
diff --git a/src/shaders/post_processing/nv12_load_save_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5
similarity index 100%
rename from src/shaders/post_processing/nv12_load_save_nv12.g4b.gen5
rename to src/shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5
diff --git a/src/shaders/post_processing/nv12_load_save_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b
similarity index 100%
rename from src/shaders/post_processing/nv12_load_save_nv12.g6b
rename to src/shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pa.asm b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.asm
new file mode 100755
index 0000000..3fa4494
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.asm
@@ -0,0 +1,18 @@
+// Module name: NV12_LOAD_SAVE_pl1
+.kernel NV12_LOAD_SAVE_PL1 // what's usage of it? just a name?
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "NV12_Load_8x5.asm"
+#include "PL8x5_PL8x8.asm"
+#include "PL8x8_Save_PA.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD // End of Thread
+
+.end_code
+
+.end_kernel
+
+// end of nv12_load_save_pl1.asm
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5
new file mode 100644
index 0000000..dad88db
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5
@@ -0,0 +1,117 @@
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a001 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0004000f },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x02600031, 0x28000c01, 0x408d0000, 0x0238a002 },
+ { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+ { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+ { 0x00800001, 0x22800229, 0x00ae0840, 0x00000000 },
+ { 0x00800001, 0x23800229, 0x00ae0841, 0x00000000 },
+ { 0x00800001, 0x22600229, 0x00ae0820, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x00ae0821, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x00ae0800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x00ae0801, 0x00000000 },
+ { 0x80600042, 0x22b02529, 0x008d0270, 0x008d0280 },
+ { 0x80600042, 0x23b02529, 0x008d0370, 0x008d0380 },
+ { 0x00600001, 0x22a00129, 0x008d0270, 0x00000000 },
+ { 0x80600042, 0x22902529, 0x008d0260, 0x008d0270 },
+ { 0x00600001, 0x23a00129, 0x008d0370, 0x00000000 },
+ { 0x80600042, 0x23902529, 0x008d0360, 0x008d0370 },
+ { 0x00600001, 0x22800129, 0x008d0260, 0x00000000 },
+ { 0x80600042, 0x22702529, 0x008d0250, 0x008d0260 },
+ { 0x00600001, 0x23800129, 0x008d0360, 0x00000000 },
+ { 0x80600042, 0x23702529, 0x008d0350, 0x008d0360 },
+ { 0x00600001, 0x22600129, 0x008d0250, 0x00000000 },
+ { 0x80600042, 0x22502529, 0x008d0240, 0x008d0250 },
+ { 0x00600001, 0x23600129, 0x008d0350, 0x00000000 },
+ { 0x80600042, 0x23502529, 0x008d0340, 0x008d0350 },
+ { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 },
+ { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 },
+ { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 },
+ { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 },
+ { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 },
+ { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 },
+ { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 },
+ { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 },
+ { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 },
+ { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 },
+ { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 },
+ { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 },
+ { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 },
+ { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 },
+ { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 },
+ { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 },
+ { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 },
+ { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 },
+ { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 },
+ { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a },
+ { 0x01600031, 0x21400c01, 0x408d0000, 0x0288a007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 },
+ { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 },
+ { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 },
+ { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 },
+ { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 },
+ { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 },
+ { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 },
+ { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x12082007 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff32 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff2c },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g6b b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g6b
new file mode 100644
index 0000000..5de798e
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pa.g6b
@@ -0,0 +1,190 @@
+ { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498001 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0004000f },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000040, 0x02398002 },
+ { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+ { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+ { 0x00800001, 0x22800229, 0x00ae0840, 0x00000000 },
+ { 0x00800001, 0x23800229, 0x00ae0841, 0x00000000 },
+ { 0x00800001, 0x22600229, 0x00ae0820, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x00ae0821, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x00ae0800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x00ae0801, 0x00000000 },
+ { 0x80600042, 0x22b02529, 0x008d0270, 0x008d0280 },
+ { 0x80600042, 0x23b02529, 0x008d0370, 0x008d0380 },
+ { 0x00600001, 0x22a00129, 0x008d0270, 0x00000000 },
+ { 0x80600042, 0x22902529, 0x008d0260, 0x008d0270 },
+ { 0x00600001, 0x23a00129, 0x008d0370, 0x00000000 },
+ { 0x80600042, 0x23902529, 0x008d0360, 0x008d0370 },
+ { 0x00600001, 0x22800129, 0x008d0260, 0x00000000 },
+ { 0x80600042, 0x22702529, 0x008d0250, 0x008d0260 },
+ { 0x00600001, 0x23800129, 0x008d0360, 0x00000000 },
+ { 0x80600042, 0x23702529, 0x008d0350, 0x008d0360 },
+ { 0x00600001, 0x22600129, 0x008d0250, 0x00000000 },
+ { 0x80600042, 0x22502529, 0x008d0240, 0x008d0250 },
+ { 0x00600001, 0x23600129, 0x008d0350, 0x00000000 },
+ { 0x80600042, 0x23502529, 0x008d0340, 0x008d0350 },
+ { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 },
+ { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 },
+ { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 },
+ { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 },
+ { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 },
+ { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 },
+ { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 },
+ { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 },
+ { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 },
+ { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 },
+ { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 },
+ { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 },
+ { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 },
+ { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 },
+ { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 },
+ { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 },
+ { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 },
+ { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 },
+ { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 },
+ { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a },
+ { 0x04600031, 0x21400cc1, 0x00000020, 0x02898007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 },
+ { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 },
+ { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 },
+ { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 },
+ { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 },
+ { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 },
+ { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 },
+ { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x12094007 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff32 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff2c },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.asm b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.asm
new file mode 100644
index 0000000..9fa44a1
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: NV12_LOAD_SAVE_PL3
+.kernel NV12_LOAD_SAVE_PL3
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "NV12_Load_8x4.asm"
+#include "PL8x4_Save_IMC3.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD // End of Thread
+
+.end_code
+
+.end_kernel
+
+// end of nv12_load_save_pl3.asm
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5
new file mode 100644
index 0000000..9ca4063
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5
@@ -0,0 +1,105 @@
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a001 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x02600031, 0x28000c01, 0x408d0000, 0x0228a002 },
+ { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+ { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+ { 0x00800001, 0x22600229, 0x00d20820, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x00d20821, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x00d20800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x00d20801, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000052 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x28000c01, 0x408d0000, 0x0218a008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x29000c01, 0x408d0000, 0x0218a009 },
+ { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+ { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+ { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+ { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+ { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x04082009 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff4a },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff44 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b
new file mode 100644
index 0000000..819280d
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b
@@ -0,0 +1,178 @@
+ { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498001 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000040, 0x02298002 },
+ { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+ { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+ { 0x00800001, 0x22600229, 0x00d20820, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x00d20821, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x00d20800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x00d20801, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000052 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000020, 0x02198008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x29000cc1, 0x00000020, 0x02198009 },
+ { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+ { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+ { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+ { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+ { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x04094009 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff4a },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff44 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/nv12_scaling_nv12.asm b/src/shaders/post_processing/gen5_6/nv12_scaling_nv12.asm
similarity index 100%
rename from src/shaders/post_processing/nv12_scaling_nv12.asm
rename to src/shaders/post_processing/gen5_6/nv12_scaling_nv12.asm
diff --git a/src/shaders/post_processing/nv12_scaling_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5
similarity index 100%
rename from src/shaders/post_processing/nv12_scaling_nv12.g4b.gen5
rename to src/shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5
diff --git a/src/shaders/post_processing/nv12_scaling_nv12.g6b b/src/shaders/post_processing/gen5_6/nv12_scaling_nv12.g6b
similarity index 100%
rename from src/shaders/post_processing/nv12_scaling_nv12.g6b
rename to src/shaders/post_processing/gen5_6/nv12_scaling_nv12.g6b
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_nv12.asm b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.asm
new file mode 100755
index 0000000..bd68a92
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.asm
@@ -0,0 +1,18 @@
+// Module name: PA_LOAD_SAVE_NV12
+.kernel PA_LOAD_SAVE_NV12
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "PA_Load_8x8.asm"
+#include "PL8x8_PL8x4.asm"
+#include "PL8x4_Save_NV12.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD // End of Thread
+
+.end_code
+
+.end_kernel
+
+// end of nv12_load_save_pl1.asm
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5
new file mode 100644
index 0000000..af53ccd
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5
@@ -0,0 +1,120 @@
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0288a001 },
+ { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+ { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 },
+ { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 },
+ { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 },
+ { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 },
+ { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 },
+ { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 },
+ { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 },
+ { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 },
+ { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 },
+ { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 },
+ { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 },
+ { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 },
+ { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 },
+ { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 },
+ { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 },
+ { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 },
+ { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 },
+ { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 },
+ { 0x00600001, 0x42500231, 0x00ae0260, 0x00000000 },
+ { 0x00800001, 0x42600231, 0x00ce0280, 0x00000000 },
+ { 0x00600001, 0x43500231, 0x00ae0360, 0x00000000 },
+ { 0x00800001, 0x43600231, 0x00ce0380, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+ { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+ { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+ { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+ { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+ { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+ { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+ { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+ { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+ { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+ { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+ { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+ { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff2c },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff26 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g6b b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g6b
new file mode 100644
index 0000000..343bd1c
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_nv12.g6b
@@ -0,0 +1,193 @@
+ { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02898001 },
+ { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+ { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 },
+ { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 },
+ { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 },
+ { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 },
+ { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 },
+ { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 },
+ { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 },
+ { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 },
+ { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 },
+ { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 },
+ { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 },
+ { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 },
+ { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 },
+ { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 },
+ { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 },
+ { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 },
+ { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 },
+ { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 },
+ { 0x00600001, 0x42500231, 0x00ae0260, 0x00000000 },
+ { 0x00800001, 0x42600231, 0x00ce0280, 0x00000000 },
+ { 0x00600001, 0x43500231, 0x00ae0360, 0x00000000 },
+ { 0x00800001, 0x43600231, 0x00ce0380, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+ { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000020, 0x02298008 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+ { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+ { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+ { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+ { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+ { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+ { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+ { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+ { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+ { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+ { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+ { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x06094008 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff2c },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff26 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pl3.asm b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.asm
new file mode 100755
index 0000000..9a79ac1
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.asm
@@ -0,0 +1,18 @@
+// Module name: PA_LOAD_SAVE_PL3
+.kernel PA_LOAD_SAVE_PL3
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "PA_Load_8x8.asm"
+#include "PL8x8_PL8x4.asm"
+#include "PL8x4_Save_IMC3.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD // End of Thread
+
+.end_code
+
+.end_kernel
+
+// end of pa_load_save_pl3.asm
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5 b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5
new file mode 100755
index 0000000..21ff3d7
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5
@@ -0,0 +1,123 @@
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0288a001 },
+ { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+ { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 },
+ { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 },
+ { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 },
+ { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 },
+ { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 },
+ { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 },
+ { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 },
+ { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 },
+ { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 },
+ { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 },
+ { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 },
+ { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 },
+ { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 },
+ { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 },
+ { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 },
+ { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 },
+ { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 },
+ { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 },
+ { 0x00600001, 0x42500231, 0x00ae0260, 0x00000000 },
+ { 0x00800001, 0x42600231, 0x00ce0280, 0x00000000 },
+ { 0x00600001, 0x43500231, 0x00ae0360, 0x00000000 },
+ { 0x00800001, 0x43600231, 0x00ce0380, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000005a },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x28000c01, 0x408d0000, 0x0218a008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x29000c01, 0x408d0000, 0x0218a009 },
+ { 0x00800001, 0x28600229, 0x008d0830, 0x00000000 },
+ { 0x00800001, 0x29600229, 0x008d0930, 0x00000000 },
+ { 0x00800001, 0x28400229, 0x008d0820, 0x00000000 },
+ { 0x00800001, 0x29400229, 0x008d0920, 0x00000000 },
+ { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+ { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+ { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+ { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+ { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x04082009 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff26 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff20 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g6b b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g6b
new file mode 100755
index 0000000..55c0fed
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pa_load_save_pl3.g6b
@@ -0,0 +1,196 @@
+ { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000009, 0x21003da5, 0x00000100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02898001 },
+ { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
+ { 0x00800001, 0x21400229, 0x00d29000, 0x00000000 },
+ { 0x00600001, 0x22400229, 0x00cf9400, 0x00000000 },
+ { 0x00600001, 0x23400229, 0x00cf9800, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00d29020, 0x00000000 },
+ { 0x00600001, 0x22500229, 0x00cf9420, 0x00000000 },
+ { 0x00600001, 0x23500229, 0x00cf9820, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00d29040, 0x00000000 },
+ { 0x00600001, 0x22600229, 0x00cf9440, 0x00000000 },
+ { 0x00600001, 0x23600229, 0x00cf9840, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00d29060, 0x00000000 },
+ { 0x00600001, 0x22700229, 0x00cf9460, 0x00000000 },
+ { 0x00600001, 0x23700229, 0x00cf9860, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00d29080, 0x00000000 },
+ { 0x00600001, 0x22800229, 0x00cf9480, 0x00000000 },
+ { 0x00600001, 0x23800229, 0x00cf9880, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00d290a0, 0x00000000 },
+ { 0x00600001, 0x22900229, 0x00cf94a0, 0x00000000 },
+ { 0x00600001, 0x23900229, 0x00cf98a0, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00d290c0, 0x00000000 },
+ { 0x00600001, 0x22a00229, 0x00cf94c0, 0x00000000 },
+ { 0x00600001, 0x23a00229, 0x00cf98c0, 0x00000000 },
+ { 0x00800001, 0x22200229, 0x00d290e0, 0x00000000 },
+ { 0x00600001, 0x22b00229, 0x00cf94e0, 0x00000000 },
+ { 0x00600001, 0x23b00229, 0x00cf98e0, 0x00000000 },
+ { 0x00600001, 0x42500231, 0x00ae0260, 0x00000000 },
+ { 0x00800001, 0x42600231, 0x00ce0280, 0x00000000 },
+ { 0x00600001, 0x43500231, 0x00ae0360, 0x00000000 },
+ { 0x00800001, 0x43600231, 0x00ce0380, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000005a },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000020, 0x02198008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x29000cc1, 0x00000020, 0x02198009 },
+ { 0x00800001, 0x28600229, 0x008d0830, 0x00000000 },
+ { 0x00800001, 0x29600229, 0x008d0930, 0x00000000 },
+ { 0x00800001, 0x28400229, 0x008d0820, 0x00000000 },
+ { 0x00800001, 0x29400229, 0x008d0920, 0x00000000 },
+ { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+ { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+ { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+ { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+ { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x04094009 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff26 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff20 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.asm b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.asm
new file mode 100644
index 0000000..cd1b5fe
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.asm
@@ -0,0 +1,17 @@
+// Module name: PL3_LOAD_SAVE_NV12
+.kernel PL3_LOAD_SAVE_NV12
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "IMC3_Load_8x4.asm"
+#include "PL8x4_Save_NV12.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD // End of Thread
+
+.end_code
+
+.end_kernel
+
+// end of pl3_load_save_nv12.asm
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5 b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5
new file mode 100644
index 0000000..cf31c50
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5
@@ -0,0 +1,108 @@
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a001 },
+ { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x02600031, 0x28000c01, 0x408d0000, 0x0218a002 },
+ { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+ { 0x02600031, 0x29000c01, 0x408d0000, 0x0218a003 },
+ { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+ { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+ { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+ { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+ { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+ { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+ { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+ { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+ { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+ { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+ { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+ { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+ { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+ { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+ { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff44 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff3e },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b
new file mode 100644
index 0000000..437ba56
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b
@@ -0,0 +1,181 @@
+ { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498001 },
+ { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000040, 0x02198002 },
+ { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x29000cc1, 0x00000040, 0x02198003 },
+ { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+ { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+ { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+ { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000020, 0x02298008 },
+ { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
+ { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
+ { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
+ { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
+ { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
+ { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
+ { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
+ { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
+ { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
+ { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
+ { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
+ { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x06094008 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff44 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff3e },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pa.asm b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.asm
new file mode 100755
index 0000000..11efe0a
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.asm
@@ -0,0 +1,18 @@
+// Module name: PL3_LOAD_SAVE_pa
+.kernel PL3_LOAD_SAVE_PA // what's usage of it? just a name?
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "IMC3_Load_8x5.asm"
+#include "PL8x5_PL8x8.asm"
+#include "PL8x8_Save_PA.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD // End of Thread
+
+.end_code
+
+.end_kernel
+
+// end of pl3_load_save_pa.asm
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5 b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5
new file mode 100644
index 0000000..5a58923
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5
@@ -0,0 +1,119 @@
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a001 },
+ { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00040007 },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x02600031, 0x28000c01, 0x408d0000, 0x0228a002 },
+ { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+ { 0x02600031, 0x29000c01, 0x408d0000, 0x0228a003 },
+ { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+ { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+ { 0x00800001, 0x22800229, 0x008d0820, 0x00000000 },
+ { 0x00800001, 0x23800229, 0x008d0920, 0x00000000 },
+ { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+ { 0x80600042, 0x22b02529, 0x008d0270, 0x008d0280 },
+ { 0x80600042, 0x23b02529, 0x008d0370, 0x008d0380 },
+ { 0x00600001, 0x22a00129, 0x008d0270, 0x00000000 },
+ { 0x80600042, 0x22902529, 0x008d0260, 0x008d0270 },
+ { 0x00600001, 0x23a00129, 0x008d0370, 0x00000000 },
+ { 0x80600042, 0x23902529, 0x008d0360, 0x008d0370 },
+ { 0x00600001, 0x22800129, 0x008d0260, 0x00000000 },
+ { 0x80600042, 0x22702529, 0x008d0250, 0x008d0260 },
+ { 0x00600001, 0x23800129, 0x008d0360, 0x00000000 },
+ { 0x80600042, 0x23702529, 0x008d0350, 0x008d0360 },
+ { 0x00600001, 0x22600129, 0x008d0250, 0x00000000 },
+ { 0x80600042, 0x22502529, 0x008d0240, 0x008d0250 },
+ { 0x00600001, 0x23600129, 0x008d0350, 0x00000000 },
+ { 0x80600042, 0x23502529, 0x008d0340, 0x008d0350 },
+ { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 },
+ { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 },
+ { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 },
+ { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 },
+ { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 },
+ { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 },
+ { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 },
+ { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 },
+ { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 },
+ { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 },
+ { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 },
+ { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 },
+ { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 },
+ { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 },
+ { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 },
+ { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 },
+ { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 },
+ { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 },
+ { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 },
+ { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a },
+ { 0x01600031, 0x21400c01, 0x408d0000, 0x0288a007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 },
+ { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 },
+ { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 },
+ { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 },
+ { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 },
+ { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 },
+ { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 },
+ { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x12082007 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff2e },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff28 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g6b b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g6b
new file mode 100644
index 0000000..be37861
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pa.g6b
@@ -0,0 +1,192 @@
+ { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498001 },
+ { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00040007 },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000040, 0x02298002 },
+ { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x29000cc1, 0x00000040, 0x02298003 },
+ { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+ { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+ { 0x00800001, 0x22800229, 0x008d0820, 0x00000000 },
+ { 0x00800001, 0x23800229, 0x008d0920, 0x00000000 },
+ { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+ { 0x80600042, 0x22b02529, 0x008d0270, 0x008d0280 },
+ { 0x80600042, 0x23b02529, 0x008d0370, 0x008d0380 },
+ { 0x00600001, 0x22a00129, 0x008d0270, 0x00000000 },
+ { 0x80600042, 0x22902529, 0x008d0260, 0x008d0270 },
+ { 0x00600001, 0x23a00129, 0x008d0370, 0x00000000 },
+ { 0x80600042, 0x23902529, 0x008d0360, 0x008d0370 },
+ { 0x00600001, 0x22800129, 0x008d0260, 0x00000000 },
+ { 0x80600042, 0x22702529, 0x008d0250, 0x008d0260 },
+ { 0x00600001, 0x23800129, 0x008d0360, 0x00000000 },
+ { 0x80600042, 0x23702529, 0x008d0350, 0x008d0360 },
+ { 0x00600001, 0x22600129, 0x008d0250, 0x00000000 },
+ { 0x80600042, 0x22502529, 0x008d0240, 0x008d0250 },
+ { 0x00600001, 0x23600129, 0x008d0350, 0x00000000 },
+ { 0x80600042, 0x23502529, 0x008d0340, 0x008d0350 },
+ { 0x00400040, 0x22083e28, 0x00690028, 0x07000700 },
+ { 0x00800001, 0xd0000231, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0xd0200231, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0xd0400231, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0xd0600231, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0xd0800231, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0xd0a00231, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0xd0c00231, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0xd0e00231, 0x00d20220, 0x00000000 },
+ { 0x00600001, 0xf4000231, 0x00ae0240, 0x00000000 },
+ { 0x00600001, 0xf8000231, 0x00ae0340, 0x00000000 },
+ { 0x00600001, 0xf4200231, 0x00ae0250, 0x00000000 },
+ { 0x00600001, 0xf8200231, 0x00ae0350, 0x00000000 },
+ { 0x00600001, 0xf4400231, 0x00ae0260, 0x00000000 },
+ { 0x00600001, 0xf8400231, 0x00ae0360, 0x00000000 },
+ { 0x00600001, 0xf4600231, 0x00ae0270, 0x00000000 },
+ { 0x00600001, 0xf8600231, 0x00ae0370, 0x00000000 },
+ { 0x00600001, 0xf4800231, 0x00ae0280, 0x00000000 },
+ { 0x00600001, 0xf8800231, 0x00ae0380, 0x00000000 },
+ { 0x00600001, 0xf4a00231, 0x00ae0290, 0x00000000 },
+ { 0x00600001, 0xf8a00231, 0x00ae0390, 0x00000000 },
+ { 0x00600001, 0xf4c00231, 0x00ae02a0, 0x00000000 },
+ { 0x00600001, 0xf8c00231, 0x00ae03a0, 0x00000000 },
+ { 0x00600001, 0xf4e00231, 0x00ae02b0, 0x00000000 },
+ { 0x00600001, 0xf8e00231, 0x00ae03b0, 0x00000000 },
+ { 0x00000409, 0x21003da5, 0x000000a0, 0x00010001 },
+ { 0x00000c01, 0x210401a5, 0x000000a2, 0x00000000 },
+ { 0x00000801, 0x21080061, 0x00000000, 0x0007001f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x0000002a },
+ { 0x04600031, 0x21400cc1, 0x00000020, 0x02898007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x27000129, 0x02b10140, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x27200129, 0x02b10160, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x27400129, 0x02b10180, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x27600129, 0x02b101a0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x27800129, 0x02b101c0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x27a00129, 0x02b101e0, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x27c00129, 0x02b10200, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x27e00129, 0x02b10220, 0x00000000 },
+ { 0x00600001, 0x20400022, 0x008d0700, 0x00000000 },
+ { 0x00600001, 0x20600022, 0x008d0720, 0x00000000 },
+ { 0x00600001, 0x20800022, 0x008d0740, 0x00000000 },
+ { 0x00600001, 0x20a00022, 0x008d0760, 0x00000000 },
+ { 0x00600001, 0x20c00022, 0x008d0780, 0x00000000 },
+ { 0x00600001, 0x20e00022, 0x008d07a0, 0x00000000 },
+ { 0x00600001, 0x21000022, 0x008d07c0, 0x00000000 },
+ { 0x00600001, 0x21200022, 0x008d07e0, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x12094007 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff2e },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff28 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.asm b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.asm
new file mode 100644
index 0000000..f2e9406
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: PL3_LOAD_SAVE_pl3
+.kernel PL3_LOAD_SAVE_PL3
+.code
+
+#include "SetupVPKernel.asm"
+#include "Multiple_Loop_Head.asm"
+#include "IMC3_Load_8x4.asm"
+#include "PL8x4_Save_IMC3.asm"
+#include "Multiple_Loop.asm"
+
+END_THREAD // End of Thread
+
+.end_code
+
+.end_kernel
+
+// end of pl3_load_save_pl3.asm
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5 b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5
new file mode 100644
index 0000000..26fa256
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5
@@ -0,0 +1,107 @@
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a001 },
+ { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x02600031, 0x28000c01, 0x408d0000, 0x0218a002 },
+ { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+ { 0x02600031, 0x29000c01, 0x408d0000, 0x0218a003 },
+ { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+ { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+ { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000052 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x28000c01, 0x408d0000, 0x0218a008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x01600031, 0x29000c01, 0x408d0000, 0x0218a009 },
+ { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+ { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+ { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+ { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+ { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+ { 0x01600031, 0x20000c04, 0x508d0000, 0x04082009 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff46 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff40 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b
new file mode 100644
index 0000000..c9ee1a1
--- /dev/null
+++ b/src/shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b
@@ -0,0 +1,180 @@
+ { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
+ { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
+ { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
+ { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
+ { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
+ { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
+ { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498001 },
+ { 0x0020000c, 0x21003ca5, 0x00450100, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20400022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000040, 0x02198002 },
+ { 0x00600001, 0x20600022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x29000cc1, 0x00000040, 0x02198003 },
+ { 0x00800001, 0x22200229, 0x00b10770, 0x00000000 },
+ { 0x00800001, 0x22000229, 0x00b10760, 0x00000000 },
+ { 0x00800001, 0x21e00229, 0x00b10750, 0x00000000 },
+ { 0x00800001, 0x21c00229, 0x00b10740, 0x00000000 },
+ { 0x00800001, 0x21a00229, 0x00b10730, 0x00000000 },
+ { 0x00800001, 0x21800229, 0x00b10720, 0x00000000 },
+ { 0x00800001, 0x21600229, 0x00b10710, 0x00000000 },
+ { 0x00800001, 0x21400229, 0x00b10700, 0x00000000 },
+ { 0x00800001, 0x22600229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x23600229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x22400229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x23400229, 0x008d0900, 0x00000000 },
+ { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
+ { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000052 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x28000cc1, 0x00000020, 0x02198008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x04600031, 0x29000cc1, 0x00000020, 0x02198009 },
+ { 0x00800001, 0x28200229, 0x008d0810, 0x00000000 },
+ { 0x00800001, 0x29200229, 0x008d0910, 0x00000000 },
+ { 0x00800001, 0x28000229, 0x008d0800, 0x00000000 },
+ { 0x00800001, 0x29000229, 0x008d0900, 0x00000000 },
+ { 0x00000001, 0x26000228, 0x000000ba, 0x00000000 },
+ { 0x00610001, 0x24400129, 0x000000b8, 0x00000000 },
+ { 0x00710001, 0x24400169, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
+ { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
+ { 0x00910001, 0x22400231, 0x028d0800, 0x00000000 },
+ { 0x00910001, 0x23400231, 0x028d0900, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
+ { 0x00910001, 0x41410231, 0x02b10701, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
+ { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
+ { 0x00910001, 0x22500231, 0x028d0810, 0x00000000 },
+ { 0x00910001, 0x23500231, 0x028d0910, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
+ { 0x00910001, 0x41810231, 0x02b10721, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
+ { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
+ { 0x00910001, 0x22600231, 0x028d0820, 0x00000000 },
+ { 0x00910001, 0x23600231, 0x028d0920, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
+ { 0x00910001, 0x41c10231, 0x02b10741, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
+ { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
+ { 0x00910001, 0x22700231, 0x028d0830, 0x00000000 },
+ { 0x00910001, 0x23700231, 0x028d0930, 0x00000000 },
+ { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
+ { 0x00910001, 0x42010231, 0x02b10761, 0x00000000 },
+ { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
+ { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
+ { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
+ { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
+ { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
+ { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
+ { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
+ { 0x0020000c, 0x21003da5, 0x004500a0, 0x00010001 },
+ { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20240, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20260, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
+ { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
+ { 0x00800001, 0x20400232, 0x00d20340, 0x00000000 },
+ { 0x00800001, 0x20500232, 0x00d20360, 0x00000000 },
+ { 0x05600031, 0x20000cc4, 0x00000020, 0x04094009 },
+ { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
+ { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
+ { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
+ { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0xffffff46 },
+ { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
+ { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0xffffff40 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
+ { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/DI_Core.g4a b/src/shaders/post_processing/gen7/DI_Core.g4a
new file mode 100644
index 0000000..952e1d4
--- /dev/null
+++ b/src/shaders/post_processing/gen7/DI_Core.g4a
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 22 // Total instruction count
+// 1 // Total kernel count
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// End of common.inc
+
+// FileName: DI.asm
+// Author: Vivek Kumar
+// Description: Tasks for DI only case (16x4 block)
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
diff --git a/src/shaders/post_processing/gen7/DI_Save_NV12_16x4.g4a b/src/shaders/post_processing/gen7/DI_Save_NV12_16x4.g4a
new file mode 100644
index 0000000..68ef504
--- /dev/null
+++ b/src/shaders/post_processing/gen7/DI_Save_NV12_16x4.g4a
@@ -0,0 +1,279 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 20 // Total instruction count
+// 1 // Total kernel count
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// End of common.inc
+
+// FileName: DI_Save_NV12_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in NV12 format
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+mov (2) r27.0<1>:d r7.0<2;2,1>:w { NoDDClr }
+mov (1) r27.2<1>:ud 0x3000F:ud { NoDDChk } // Block width and height (16x4)
+
+//Bottom field Y
+ mov (8) mudMSGHDR_DI_OUT1(1)<1> udDNDI_RESP(0,0)
+ mov (8) mudMSGHDR_DI_OUT1(2)<1> udDNDI_RESP(0,8)
+// Top field Y
+ mov (8) mudMSGHDR_DI_OUT2(1)<1> udDNDI_RESP(4,0)
+ mov (8) mudMSGHDR_DI_OUT2(2)<1> udDNDI_RESP(4,8)
+
+//copy message desrcptor to the message header
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r23.0<1>:ud r27<8;8,1>:ud
+
+//Change origin to U/V block
+asr (1) r27.1<1>:d r27.1<0;1,0>:d 1:w { NoDDClr } // U/V block origin should be half of Y's
+mov (1) r27.2<1>:ud 0x1000F:ud { NoDDChk } // Block width and height (16x2)
+
+// Bottom field U/V
+mov (16) r21.0<2>:ub ubDNDI_RESP(2, 1)<32;8,2> { NoDDClr }
+mov (16) r21.1<2>:ub ubDNDI_RESP(2, 0)<32;8,2> { NoDDChk }
+
+// Top field U/V
+mov (16) r26.0<2>:ub ubDNDI_RESP(6, 1)<32;8,2> { NoDDClr }
+mov (16) r26.1<2>:ub ubDNDI_RESP(6, 0)<32;8,2> { NoDDChk }
+
+//copy message desrcptor to the message header
+mov (8) r21<1>:ud r27<8;8,1>:ud
+mov (8) r26<1>:ud r27<8;8,1>:ud
+
+//Send out Y component on previous frame to surface
+send (8) null<1>:d r18.0 0x5 0x60A801B:ud
+//Send out Y component on current frame to surface
+send (8) null<1>:d r23.0 0x5 0x60A801E:ud
+//Send out U/V component on previous frame to surface
+send (8) null<1>:d r21 0x5 0x40A801C:ud
+//Send out U/V component on current frame to surface
+send (8) null<1>:d r26 0x5 0x40A801F:ud
diff --git a/src/shaders/post_processing/gen7/DI_Save_PA_16x4.g4a b/src/shaders/post_processing/gen7/DI_Save_PA_16x4.g4a
new file mode 100644
index 0000000..a59054d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/DI_Save_PA_16x4.g4a
@@ -0,0 +1,289 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 33 // Total instruction count
+// 1 // Total kernel count
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// End of common.inc
+
+// FileName: DI_Save_PA_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+add (4) a0.4<1>:uw r2.28<4;4,1>:ub 608:w // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r27.2<1>:ud 0x3001F:ud { NoDDChk } // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r23.0<1>:ud r27<8;8,1>:ud
+
+// Pack 2nd field Y
+ mov (16) r[a0.4, 0]<2> ubDNDI_RESP(0,0) { NoDDClr }
+ mov (16) r[a0.4, 32]<2> ubDNDI_RESP(0,16) { NoDDClr }
+ mov (16) r[a0.4, 64]<2> ubDNDI_RESP(0,32) { NoDDClr }
+ mov (16) r[a0.4, 96]<2> ubDNDI_RESP(0,48) { NoDDClr }
+// Pack 2nd field U
+ mov (8) r[a0.5, 0]<4> ubDNDI_RESP(2,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 32]<4> ubDNDI_RESP(2,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 64]<4> ubDNDI_RESP(2,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 96]<4> ubDNDI_RESP(2,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 2nd field V
+ mov (8) r[a0.6, 0]<4> ubDNDI_RESP(2,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 32]<4> ubDNDI_RESP(2,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 64]<4> ubDNDI_RESP(2,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 96]<4> ubDNDI_RESP(2,48)<16;8,2> { NoDDChk } //Vpixels
+
+// Pack 1st field Y
+ mov (16) r[a0.4, 160]<2> ubDNDI_RESP(4,0) { NoDDClr }
+ mov (16) r[a0.4, 192]<2> ubDNDI_RESP(4,16) { NoDDClr }
+ mov (16) r[a0.4, 224]<2> ubDNDI_RESP(4,32) { NoDDClr }
+ mov (16) r[a0.4, 256]<2> ubDNDI_RESP(4,48) { NoDDClr }
+// Pack 1st field U
+ mov (8) r[a0.5, 160]<4> ubDNDI_RESP(6,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 192]<4> ubDNDI_RESP(6,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 224]<4> ubDNDI_RESP(6,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 256]<4> ubDNDI_RESP(6,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 1st field V
+ mov (8) r[a0.6, 160]<4> ubDNDI_RESP(6,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 192]<4> ubDNDI_RESP(6,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 224]<4> ubDNDI_RESP(6,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 256]<4> ubDNDI_RESP(6,48)<16;8,2> { NoDDChk } //Vpixels
+
+//save the previous frame
+send (8) null<1>:d r18.0 0x5 0xA0A801B:ud
+
+//save the current frame
+send (8) null<1>:d r23.0 0x5 0xA0A801E:ud
diff --git a/src/shaders/post_processing/gen7/EOT.g4a b/src/shaders/post_processing/gen7/EOT.g4a
new file mode 100644
index 0000000..72c3da3
--- /dev/null
+++ b/src/shaders/post_processing/gen7/EOT.g4a
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 2 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
diff --git a/src/shaders/post_processing/gen7/Makefile.am b/src/shaders/post_processing/gen7/Makefile.am
new file mode 100644
index 0000000..587f266
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Makefile.am
@@ -0,0 +1,97 @@
+INTEL_PP_G7B = \
+ avs.g7b \
+ dndi.g7b \
+ nv12_dn_nv12.g7b \
+ pa_to_pl2.g7b \
+ pa_to_pl3.g7b \
+ pl2_to_pa.g7b \
+ pl2_to_pl2.g7b \
+ pl2_to_pl3.g7b \
+ pl3_to_pa.g7b \
+ pl3_to_pl2.g7b \
+ pl3_to_pl3.g7b \
+ $(NULL)
+
+INTEL_PP_G4A = \
+ DI_Core.g4a \
+ DI_Save_NV12_16x4.g4a \
+ DI_Save_PA_16x4.g4a \
+ EOT.g4a \
+ NV12_DI_NV12.g4a \
+ NV12_DNDI_422CP.g4a \
+ NV12_DNDI_PA.g4a \
+ NV12_DNUV_NV12.g4a \
+ NV12_DN_422CP.g4a \
+ NV12_DN_NV12.g4a \
+ PA_AVS_Buf_0.g4a \
+ PA_AVS_Buf_1.g4a \
+ PA_AVS_Buf_2.g4a \
+ PA_AVS_Buf_3.g4a \
+ PA_DI_422CP.g4a \
+ PA_DI_PA.g4a \
+ PA_DNDI_422CP.g4a \
+ PA_DNDI_PA.g4a \
+ PA_DNUV_PA.g4a \
+ PA_DN_422CP.g4a \
+ PA_DN_PA.g4a \
+ PL2_AVS_Buf_0.g4a \
+ PL2_AVS_Buf_1.g4a \
+ PL2_AVS_Buf_2.g4a \
+ PL2_AVS_Buf_3.g4a \
+ PL3_AVS_Buf_0.g4a \
+ PL3_AVS_Buf_1.g4a \
+ PL3_AVS_Buf_2.g4a \
+ PL3_AVS_Buf_3.g4a \
+ PL3_DNDI_422CP.g4a \
+ PL3_DNDI_PA.g4a \
+ PL3_DNUV_PL3.g4a \
+ PL3_DN_422CP.g4a \
+ PL3_DN_PL3.g4a \
+ PL_DI_422CP.g4a \
+ PL_DI_PA.g4a \
+ Save_AVS_PA.g4a \
+ Save_AVS_PL3.g4a \
+ Save_AVS_NV12.g4a \
+ Save_AVS_RGB.g4a \
+ Set_AVS_Buf_0123_BGRA.g4a \
+ Set_AVS_Buf_0123_PL2.g4a \
+ Set_AVS_Buf_0123_PL3.g4a \
+ Set_AVS_Buf_0123_VUYA.g4a \
+ Set_AVS_Buf_0123_VYUA.g4a \
+ Set_Layer_0.g4a \
+ VP_Setup.g4a \
+ $(NULL)
+
+INTEL_PP_ASM = $(INTEL_PP_G7B:%.g7b=%.asm)
+INTEL_PP_GEN7_ASM = $(INTEL_PP_G7B:%.g7b=%.g7s)
+
+INTEL_PP_G75B = $(INTEL_PP_G7B:%.g7b=%.g75b)
+
+TARGETS =
+if HAVE_GEN4ASM
+TARGETS += $(INTEL_PP_G7B) $(INTEL_PP_G75B)
+endif
+
+all-local: $(TARGETS)
+
+SUFFIXES = .g7b .g7s .asm
+
+$(INTEL_PP_GEN7_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G4A)
+.asm.g7s:
+ $(AM_V_GEN)cpp $< > _pp0.$@; \
+ ../../gpp.py _pp0.$@ $@; \
+ rm _pp0.$@
+.g7s.g7b:
+ $(AM_V_GEN)intel-gen4asm -a -o $@ -g 7 $<
+
+.g7s.g75b:
+ $(AM_V_GEN)intel-gen4asm -a -o $@ -g 7.5 $<
+
+CLEANFILES = $(INTEL_PP_GEN7_ASM)
+
+EXTRA_DIST = \
+ $(INTEL_PP_G7B) \
+ $(INTEL_PP_G75B)
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
diff --git a/src/shaders/post_processing/gen7/NV12_DI_NV12.g4a b/src/shaders/post_processing/gen7/NV12_DI_NV12.g4a
new file mode 100644
index 0000000..668b61f
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DI_NV12.g4a
@@ -0,0 +1,392 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 57 // Total instruction count
+// 1 // Total kernel count
+
+.kernel NV12_DI_NV12
+.code
+
+
+
+// FileName: DI.asm
+// Author: Vivek Kumar
+// Description: Tasks for DI only case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DI_Save_NV12_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+// add (4) a0.4<1>:uw r2.28<4;4,1>:ub 608:w // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r28.0<1>:ud r0.0<8;8,1>:ud
+mov (1) r28.0<1>:d r7.0<0;1,0>:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r28.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r28.2<1>:ud 0x3000F:ud { NoDDChk } // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r28<8;8,1>:ud
+mov (8) r23.0<1>:ud r28<8;8,1>:ud
+
+//Bottom field Y
+mov (8) mudMSGHDR_DI_OUT1(1)<1> udDNDI_RESP(0,0)
+mov (8) mudMSGHDR_DI_OUT1(2)<1> udDNDI_RESP(0,8)
+// Top field Y
+mov (8) mudMSGHDR_DI_OUT2(1)<1> udDNDI_RESP(4,0)
+mov (8) mudMSGHDR_DI_OUT2(2)<1> udDNDI_RESP(4,8)
+
+//Change origin to U/V block
+asr (1) r28.1<1>:d r28.1<0;1,0>:d 1:w { NoDDClr } // U/V block origin should be half of Y's
+mov (1) r28.2<1>:ud 0x1000F:ud { NoDDChk } // Block width and height (16x2)
+
+// Bottom field U/V
+mov (16) r22.0<2>:ub ubDNDI_RESP(2, 1)<32;8,2> { NoDDClr }
+mov (16) r22.1<2>:ub ubDNDI_RESP(2, 0)<32;8,2> { NoDDChk }
+
+// Top field U/V
+mov (16) r27.0<2>:ub ubDNDI_RESP(6, 1)<32;8,2> { NoDDClr }
+mov (16) r27.1<2>:ub ubDNDI_RESP(6, 0)<32;8,2> { NoDDChk }
+
+//copy message desrcptor to the message header
+mov (8) r21<1>:ud r28<8;8,1>:ud
+mov (8) r26<1>:ud r28<8;8,1>:ud
+
+//Send out Y component on previous frame to surface
+send (8) null<1>:d r18 0x5 0x60A801B:ud
+//Send out Y component on current frame to surface
+send (8) null<1>:d r23 0x5 0x60A801E:ud
+//Send out U/V component on previous frame to surface
+send (8) null<1>:d r21 0x5 0x40A801C:ud
+//Send out U/V component on current frame to surface
+send (8) null<1>:d r26 0x5 0x40A801F:ud
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/NV12_DNDI_422CP.g4a b/src/shaders/post_processing/gen7/NV12_DNDI_422CP.g4a
new file mode 100644
index 0000000..0c0002f
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DNDI_422CP.g4a
@@ -0,0 +1,557 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 116 // Total instruction count
+// 1 // Total kernel count
+
+.kernel NV12_DNDI_422CP
+.code
+
+
+
+// FileName: DNDI_PL_Core.asm
+// Author: Tatiya, Rupesh
+
+
+
+// FileName: DNDI_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4BE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+ mov (1) mudMSGHDR_HIST(1)<1> udDNDI_RESP(9,0)<0;1,0> // Move denoise history to MRF (4x1)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x3:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Load_UV_NV12_16x4.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x4 block through DATAPORT
+
+
+
+// FileName: UVCopy_Load_16x4.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x4 block through DATAPORT
+
+
+ add (2) r27.0<1>:d r7.0<2;2,1>:w r4.4<2;2,1>:w // Source Y Block origin
+ asr (1) r27.1<1>:d r27.1<0;1,0>:d 1:w { NoDDClr } // U/V block origin should be half of Y's
+ mov (1) r27.2<1>:ud 0x1000F:ud { NoDDChk } // U/V block width and height (8x4)
+ mov (8) mudMSGHDR_UVCOPY(0)<1> r27.0<8;8,1>:ud
+ send (8) udDNDI_UV_RESP(0)<1> r36 0x4 0x2190001:ud
+
+
+
+// FileName: DN_Save_Y_16x4.asm
+// Author: Vivek Kumar
+// Description: Save one 16x4 blocks of Y channel of DN output for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1) null<1>:w r1.28<0;1,0>:ub 1:w
+
+mov (8) mudMSGHDR_DN_OUT(0)<1> r0<8;8,1>:ud // message header
+mov (2) mdMSGHDR_DN_OUT(0,0)<1> r7.0<2;2,1>:w { NoDDClr } // X origin * 2 (422 output)
+mov (1) mudMSGHDR_DN_OUT(0,2)<1> 0x3000F:ud { NoDDChk } // block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+
+ mov (4) mudMSGHDR_DN_OUT(1,0)<1> udDNDI_RESP(10,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(1,4)<1> udDNDI_RESP(4,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+ mov (4) mudMSGHDR_DN_OUT(2,0)<1> udDNDI_RESP(10,4)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(2,4)<1> udDNDI_RESP(5,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+
+ jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+ mov (4) mudMSGHDR_DN_OUT(1,0)<1> udDNDI_RESP(4,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(1,4)<1> udDNDI_RESP(10,0)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+ mov (4) mudMSGHDR_DN_OUT(2,0)<1> udDNDI_RESP(5,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(2,4)<1> udDNDI_RESP(10,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8) null<1>:d r31.0 0x5 0x60A8018:ud
+
+
+
+// FileName: DI_Save_422CP_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in 422 format to Color Pipe (IECP)
+
+
+.declare mubMSGHDR_DI_OUT1_1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT1_2 Base=r21.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_1 Base=r24.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_2 Base=r27.0 ElementSize=1 Type=ub
+
+
+mov (8) r27.0<1>:ud r0.0<8;8,1>:ud
+shl (1) r27.0<1>:ud r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r27.1<1>:ud r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r27.2<1>:ud 0x3000F:ud { NoDDClr, NoDDChk } // Block width and height (16x8)
+
+//M0.3 - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1) r27.3<1>:ud r2.4<0;1,0>:ud r7.26<0;1,0>:b { NoDDChk }
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r24.0<1>:ud r27<8;8,1>:ud
+
+
+// Pack 2nd field Y; First 8x4 block
+ mov (8) mubMSGHDR_DI_OUT1_1(1)<2> ubDNDI_RESP(0,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(1,16)<2> ubDNDI_RESP(0,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(2)<2> ubDNDI_RESP(0,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(2,16)<2> ubDNDI_RESP(0,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; First 8x4 block
+ mov (4) mubMSGHDR_DI_OUT1_1(1,1)<4> ubDNDI_RESP(2,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(1,17)<4> ubDNDI_RESP(2,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_1(1,3)<4> ubDNDI_RESP(2,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(1,19)<4> ubDNDI_RESP(2,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,1)<4> ubDNDI_RESP(2,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,17)<4> ubDNDI_RESP(2,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_1(2,3)<4> ubDNDI_RESP(2,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,19)<4> ubDNDI_RESP(2,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+
+// Pack 2nd field Y; Second 8x4 block
+mov (8) r21.0<1>:ud r18.0<8;8,1>:ud
+add (1) r21.0<1>:ud r21.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DI_OUT1_2(1)<2> ubDNDI_RESP(0,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(1,16)<2> ubDNDI_RESP(0,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(2)<2> ubDNDI_RESP(0,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(2,16)<2> ubDNDI_RESP(0,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; Second 8x4 block
+ mov (4) mubMSGHDR_DI_OUT1_2(1,1)<4> ubDNDI_RESP(2,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(1,17)<4> ubDNDI_RESP(2,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_2(1,3)<4> ubDNDI_RESP(2,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(1,19)<4> ubDNDI_RESP(2,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,1)<4> ubDNDI_RESP(2,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,17)<4> ubDNDI_RESP(2,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_2(2,3)<4> ubDNDI_RESP(2,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,19)<4> ubDNDI_RESP(2,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+send (8) null<1>:d r18.0 0x5 0x60A801B:ud
+send (8) null<1>:d r21.0 0x5 0x60A801B:ud
+
+// Pack 1st field Y; 1st 8x4 block
+ mov (8) mubMSGHDR_DI_OUT2_1(1)<2> ubDNDI_RESP(4,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(1,16)<2> ubDNDI_RESP(4,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(2)<2> ubDNDI_RESP(4,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(2,16)<2> ubDNDI_RESP(4,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 1st field U,V; 1st 8x4 block
+ mov (4) mubMSGHDR_DI_OUT2_1(1,1)<4> ubDNDI_RESP(6,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(1,17)<4> ubDNDI_RESP(6,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_1(1,3)<4> ubDNDI_RESP(6,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(1,19)<4> ubDNDI_RESP(6,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,1)<4> ubDNDI_RESP(6,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,17)<4> ubDNDI_RESP(6,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_1(2,3)<4> ubDNDI_RESP(6,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,19)<4> ubDNDI_RESP(6,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+// Pack 1st field Y; 2nd 8x4 block
+mov (8) r27.0<1>:ud r24.0<8;8,1>:ud
+add (1) r27.0<1>:ud r27.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DI_OUT2_2(1)<2> ubDNDI_RESP(4,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(1,16)<2> ubDNDI_RESP(4,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(2)<2> ubDNDI_RESP(4,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(2,16)<2> ubDNDI_RESP(4,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 1st field U, V; 2nd 8x4 block
+ mov (4) mubMSGHDR_DI_OUT2_2(1,1)<4> ubDNDI_RESP(6,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(1,17)<4> ubDNDI_RESP(6,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_2(1,3)<4> ubDNDI_RESP(6,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(1,19)<4> ubDNDI_RESP(6,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,1)<4> ubDNDI_RESP(6,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,17)<4> ubDNDI_RESP(6,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_2(2,3)<4> ubDNDI_RESP(6,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,19)<4> ubDNDI_RESP(6,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+send (8) null<1>:d r24.0 0x5 0x60A801E:ud
+send (8) null<1>:d r27.0 0x5 0x60A801E:ud
+
+
+
+// FileName: DN_Save_UV_NV12_16x4.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x4 block through DATAPORT
+
+
+
+// FileName: UVCopy_Save_16x4.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x4 block through DATAPORT
+
+
+//Reuse the header from Load component
+
+ mov (8) mudMSGHDR_UVCOPY(1)<1> udDNDI_UV_RESP(0)<8;8,1>
+ send (8) null<1>:d r36 0x5 0x40A8019:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/NV12_DNDI_PA.g4a b/src/shaders/post_processing/gen7/NV12_DNDI_PA.g4a
new file mode 100644
index 0000000..7fd55b0
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DNDI_PA.g4a
@@ -0,0 +1,495 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 86 // Total instruction count
+// 1 // Total kernel count
+
+.kernel NV12_DNDI_PA
+.code
+
+
+
+// FileName: DNDI_PL_Core.asm
+// Author: Tatiya, Rupesh
+
+
+
+// FileName: DNDI_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4BE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+ mov (1) mudMSGHDR_HIST(1)<1> udDNDI_RESP(9,0)<0;1,0> // Move denoise history to MRF (4x1)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x3:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Load_UV_NV12_16x4.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x4 block through DATAPORT
+
+
+
+// FileName: UVCopy_Load_16x4.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x4 block through DATAPORT
+
+
+ add (2) r27.0<1>:d r7.0<2;2,1>:w r4.4<2;2,1>:w // Source Y Block origin
+ asr (1) r27.1<1>:d r27.1<0;1,0>:d 1:w { NoDDClr } // U/V block origin should be half of Y's
+ mov (1) r27.2<1>:ud 0x1000F:ud { NoDDChk } // U/V block width and height (8x4)
+ mov (8) mudMSGHDR_UVCOPY(0)<1> r27.0<8;8,1>:ud
+ send (8) udDNDI_UV_RESP(0)<1> r36 0x4 0x2190001:ud
+
+
+
+// FileName: DN_Save_Y_16x4.asm
+// Author: Vivek Kumar
+// Description: Save one 16x4 blocks of Y channel of DN output for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1) null<1>:w r1.28<0;1,0>:ub 1:w
+
+mov (8) mudMSGHDR_DN_OUT(0)<1> r0<8;8,1>:ud // message header
+mov (2) mdMSGHDR_DN_OUT(0,0)<1> r7.0<2;2,1>:w { NoDDClr } // X origin * 2 (422 output)
+mov (1) mudMSGHDR_DN_OUT(0,2)<1> 0x3000F:ud { NoDDChk } // block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+
+ mov (4) mudMSGHDR_DN_OUT(1,0)<1> udDNDI_RESP(10,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(1,4)<1> udDNDI_RESP(4,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+ mov (4) mudMSGHDR_DN_OUT(2,0)<1> udDNDI_RESP(10,4)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(2,4)<1> udDNDI_RESP(5,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+
+ jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+ mov (4) mudMSGHDR_DN_OUT(1,0)<1> udDNDI_RESP(4,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(1,4)<1> udDNDI_RESP(10,0)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+ mov (4) mudMSGHDR_DN_OUT(2,0)<1> udDNDI_RESP(5,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(2,4)<1> udDNDI_RESP(10,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8) null<1>:d r31.0 0x5 0x60A8018:ud
+
+
+
+// FileName: DI_Save_PA_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+add (4) a0.4<1>:uw r2.28<4;4,1>:ub 608:w // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r27.2<1>:ud 0x3001F:ud { NoDDChk } // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r23.0<1>:ud r27<8;8,1>:ud
+
+// Pack 2nd field Y
+ mov (16) r[a0.4, 0]<2> ubDNDI_RESP(0,0) { NoDDClr }
+ mov (16) r[a0.4, 32]<2> ubDNDI_RESP(0,16) { NoDDClr }
+ mov (16) r[a0.4, 64]<2> ubDNDI_RESP(0,32) { NoDDClr }
+ mov (16) r[a0.4, 96]<2> ubDNDI_RESP(0,48) { NoDDClr }
+// Pack 2nd field U
+ mov (8) r[a0.5, 0]<4> ubDNDI_RESP(2,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 32]<4> ubDNDI_RESP(2,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 64]<4> ubDNDI_RESP(2,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 96]<4> ubDNDI_RESP(2,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 2nd field V
+ mov (8) r[a0.6, 0]<4> ubDNDI_RESP(2,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 32]<4> ubDNDI_RESP(2,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 64]<4> ubDNDI_RESP(2,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 96]<4> ubDNDI_RESP(2,48)<16;8,2> { NoDDChk } //Vpixels
+
+// Pack 1st field Y
+ mov (16) r[a0.4, 160]<2> ubDNDI_RESP(4,0) { NoDDClr }
+ mov (16) r[a0.4, 192]<2> ubDNDI_RESP(4,16) { NoDDClr }
+ mov (16) r[a0.4, 224]<2> ubDNDI_RESP(4,32) { NoDDClr }
+ mov (16) r[a0.4, 256]<2> ubDNDI_RESP(4,48) { NoDDClr }
+// Pack 1st field U
+ mov (8) r[a0.5, 160]<4> ubDNDI_RESP(6,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 192]<4> ubDNDI_RESP(6,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 224]<4> ubDNDI_RESP(6,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 256]<4> ubDNDI_RESP(6,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 1st field V
+ mov (8) r[a0.6, 160]<4> ubDNDI_RESP(6,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 192]<4> ubDNDI_RESP(6,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 224]<4> ubDNDI_RESP(6,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 256]<4> ubDNDI_RESP(6,48)<16;8,2> { NoDDChk } //Vpixels
+
+//save the previous frame
+send (8) null<1>:d r18.0 0x5 0xA0A801B:ud
+
+//save the current frame
+send (8) null<1>:d r23.0 0x5 0xA0A801E:ud
+
+
+
+// FileName: DN_Save_UV_NV12_16x4.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x4 block through DATAPORT
+
+
+
+// FileName: UVCopy_Save_16x4.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x4 block through DATAPORT
+
+
+//Reuse the header from Load component
+
+ mov (8) mudMSGHDR_UVCOPY(1)<1> udDNDI_UV_RESP(0)<8;8,1>
+ send (8) null<1>:d r36 0x5 0x40A8019:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/NV12_DNUV_NV12.g4a b/src/shaders/post_processing/gen7/NV12_DNUV_NV12.g4a
new file mode 100644
index 0000000..f560ef8
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DNUV_NV12.g4a
@@ -0,0 +1,2491 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 1153 // Total instruction count
+// 1 // Total kernel count
+
+
+.kernel NV12_DNUV_NV12
+.code
+
+
+
+//Module : DN_UV_Setup
+//Author : Tatiya, Rupesh
+//Description : Initial Set-up for DN_UV
+
+
+
+
+// Module name : ChromaDenoise.inc
+// Author : Tatiya, Rupesh
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//======================================================
+//Interface for serpent mode Chroma Denoise, added by Le
+//======================================================
+//r1
+
+
+//noise history thresholds (low and high)
+
+
+//temporal difference thresholds (high and low)
+
+
+//noise history thresholds (low and high)
+//#define ubNoiseHistMaxHigh r1.22
+//#define ubNoiseHistMaxLow r1.23
+//#define ubNoiseHistDeltaHigh r1.24
+//#define ubNoiseHistDeltaLow r1.25
+
+//Gaussian thresholds
+
+
+//temporal difference thresholds (default)
+
+
+//r2
+//history thresholds (default)
+
+
+//denoise factor (0-63)
+
+
+//====================== Binding table (Explicit To DNUV)=========================================
+//Used by DN_UV kernels
+
+
+ //Pointer to Current Frame UV
+
+
+//r1-r6
+ //CURBE GRFs used as TEMP : Used for max computation and storing max temporarily. : r1-r6
+
+
+ .declare ubCURBE_TEMP Base=r1.0 ElementSize=1 Type=ub
+ .declare uwCURBE_TEMP Base=r1.0 ElementSize=2 Type=uw
+ .declare wCURBE_TEMP Base=r1.0 ElementSize=2 Type=w
+ .declare fCURBE_TEMP Base=r1.0 ElementSize=4 Type=f
+ .declare udCURBE_TEMP Base=r1.0 ElementSize=4 Type=ud
+ .declare uwMAX_ABS_DIFF Base=r5.0 ElementSize=2 Type=uw
+
+ //r1
+
+
+ //r3
+
+
+ //r4
+
+//r7
+ //All of the following has to defined in Same GRF for optimal performance.
+
+
+//r8-24
+ //Previous Frame UV
+
+ .declare udPREV_UV Base=r8.0 ElementSize=4 Type=ud
+ .declare ubPREV_UV Base=r8.0 ElementSize=1 Type=ub
+
+
+//r25-48
+ //TEMP Space for any Usage.
+
+
+//=========================================================================
+//Definations and declarations for serpent mode Chroma Denoise, added by Le
+//=========================================================================
+
+
+ .declare udGNE_UV Base=r24.0 ElementSize=4 Type=ud
+ .declare fGNE_UV Base=r24.0 ElementSize=4 Type=f
+ .declare ubGNE_UV Base=r24.0 ElementSize=1 Type=ub
+
+ .declare udMSGHDR_BNE_SERP Base=r25.0 ElementSize=4 Type=ud
+ .declare udMSGSRC_BNE_SERP Base=r26.0 ElementSize=4 Type=ud
+
+
+ .declare ubDN_UV_Thresholds Base=r26.0 ElementSize=1 Type=ub
+ .declare ubDN_UV_Thresholds_Temp Base=r27.0 ElementSize=1 Type=ub
+ .declare udDN_UV_Thresholds Base=r26.0 ElementSize=4 Type=ud
+ .declare udDN_UV_Thresholds_Temp Base=r27.0 ElementSize=4 Type=ud
+ .declare fDN_UV_Thresholds Base=r26.0 ElementSize=4 Type=f
+ .declare fDN_UV_Thresholds_Temp Base=r27.0 ElementSize=4 Type=f
+
+
+//====================================================================================
+
+
+ //TEMP23: To hold V data for PL3 surfaces
+ .declare udCURR_V_TEMP Base=r25.0 ElementSize=4 Type=ud
+ .declare ubCURR_V_TEMP Base=r25.0 ElementSize=1 Type=ub
+
+ //GRFs to calculate Median: r25-r42
+ .declare ubMEDIAN_TEMP Base=r25.0 ElementSize=1 Type=ub
+
+ //18 GRFs to hold difference : r25-r42
+ .declare wDIFF Base=r25.0 ElementSize=2 Type=w
+ .declare uwDIFF Base=r25.0 ElementSize=2 Type=uw
+
+ //Temporal Diff
+ .declare wDIFF_TEMPORAL Base=r25.0 ElementSize=2 Type=w
+ .declare ubDIFF_TEMPORAL Base=r25.0 ElementSize=1 Type=ub
+
+ //4 GRFs to hold Sobel Value : r43-46
+ .declare wSOBEL_X Base=r43.0 ElementSize=2 Type=w
+ .declare uwSOBEL Base=r43.0 ElementSize=2 Type=uw
+
+
+ //2 GRFs to hold SOAD temporarily: r47-48
+ .declare uwSOAD Base=r47.0 ElementSize=2 Type=uw
+
+ //Temp GRFs to hold extra YUYV pixels: r43-r48
+ .declare ubTEMP5 Base=r43.0 ElementSize=1 Type=ub
+
+ //Temp GRFs in Median Calculation: r47-r48
+ .declare ubTEMP1 Base=r47.0 ElementSize=1 Type=ub
+
+ .declare uwTEMP0 Base=r48.0 ElementSize=2 Type=uw
+ .declare ubTEMP0 Base=r48.0 ElementSize=1 Type=ub
+
+ //Temp Space to store Median : r49-50
+
+ .declare ubMEDIAN Base=r49.0 ElementSize=1 Type=ub
+
+//r49
+
+
+//r50
+ //Message Source
+
+
+//r51
+ //DN_UV History Surface
+
+ .declare udHIST_UV Base=r51.0 ElementSize=4 Type=ud
+ .declare ubHIST_UV Base=r51.0 ElementSize=1 Type=ub
+
+//r52 - r91
+ //r52
+ //Current Frame UV
+
+
+ .declare udCURR_UV Base=r52.0 ElementSize=4 Type=ud
+ .declare ubCURR_UV Base=r52.0 ElementSize=1 Type=ub
+
+ //r54
+ //CURBE COPY
+
+
+ //r55
+
+
+ .declare uwSOAD_MIN_8x4 Base=r56.0 ElementSize=2 Type=uw
+
+ //r61
+
+
+ //r62
+
+
+ //History Surface Temp Origin
+
+
+ //r63
+ //Current Frame Y Temp Origin
+
+
+ //BNE Surface Origin
+
+
+ //r70
+
+ .declare uwDIFF_TEMPORAL_SUM4x4 Base=r70.0 ElementSize=2 Type=uw //4 GRFs
+
+ //r74-91 : For Saving Dest UV (PL2/PL3)
+
+
+ .declare ubMSGPAYLOAD_UV0 Base=r75.0 ElementSize=1 Type=ub
+
+
+ .declare ubMSGPAYLOAD_U Base=r75.0 ElementSize=1 Type=ub
+
+
+ .declare ubMSGPAYLOAD_UV1 Base=r84.0 ElementSize=1 Type=ub
+
+
+ .declare ubMSGPAYLOAD_V Base=r84.0 ElementSize=1 Type=ub
+
+ //r90
+
+ .declare uwDIFF_TEMPORAL_SUM4x4_FINAL Base=r90.0 ElementSize=2 Type=uw //2 GRFs
+
+//r92-127
+ //Current Frame Y
+
+
+ //r92
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_0 Base=r92 ElementSize=2 Type=uw
+ //r101
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_1 Base=r101 ElementSize=2 Type=uw
+ //r110
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_2 Base=r110 ElementSize=2 Type=uw
+ //r119
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_3 Base=r119 ElementSize=2 Type=uw
+
+ .declare udCURR_Y0 Base=r93.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y0 Base=r93.0 ElementSize=1 Type=ub
+ .declare udCURR_Y1 Base=r102.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y1 Base=r102.0 ElementSize=1 Type=ub
+ .declare udCURR_Y2 Base=r111.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y2 Base=r111.0 ElementSize=1 Type=ub
+ .declare udCURR_Y3 Base=r120.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y3 Base=r120.0 ElementSize=1 Type=ub
+
+ //r92: To hold U data for PL3 surfaces
+ .declare udCURR_U_TEMP Base=r92.0 ElementSize=4 Type=ud
+ .declare ubCURR_U_TEMP Base=r92.0 ElementSize=1 Type=ub
+
+ //r112: To hold U data for PL3 surfaces
+ .declare udPREV_U_TEMP Base=r112.0 ElementSize=4 Type=ud
+ .declare ubPREV_U_TEMP Base=r112.0 ElementSize=1 Type=ub
+
+ //r120: To hold U data for PL3 surfaces
+ .declare udPREV_V_TEMP Base=r120.0 ElementSize=4 Type=ud
+ .declare ubPREV_V_TEMP Base=r120.0 ElementSize=1 Type=ub
+
+
+ // Initialize message source with r0.
+ mov (8) r50.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r92.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r101.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r110.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r119.0<1>:ud r0.0<8;8,1>:ud
+
+
+
+//Module Name : DN_UV_PL2_Load_Curr_Frame_UV
+//Author : Tatiya, Rupesh
+//Description : Loads Current Frame UV data for PL2 input.
+
+
+
+//Module name : DN_UV_Load_Curr_Frame_UV
+//Author : Tatiya, Rupesh
+//Description : Loads Current Frame (UV only).
+// We need 4 extra rows (2 per field) and 2 extra pixel (1 each side) for both U and V each.
+// The processing size is 16x16 U and V each. So we need : U size - 18x20, V size - 18x20, UV size - 36x20, YUYV size - 72x20.
+
+
+
+
+//36x20 interleaved UV block is partitioned as follows:
+// <------ 18 --------> <--------18 ------->
+// -----------------------------------------
+// | 20x8 A !| 20x8 D !
+// | (overlapped) !| (overlapped) !
+// |-------------------!|------------------!
+// | 20x8 B !| 20x8 E !
+// | (overlapped) !| (overlapped) !
+// |-------------------!|------------------!
+// | 20x4 C !| 20x4 F !
+// | (overlapped) !| (overlapped) !
+// -----------------------------------------
+//
+// Cordinates: (x-2, y-2), (x+14, y-2), (x-2, y+6), (x+14, y+6), (x-2, y+14), (x+14, y+14)
+
+ //UV surface origin: (ORIX, ORIY/2)
+ add (2) r7.4<1>:w r7.0<2;2,1>:w r4.4<2;2,1>:w { AccWrEn } // Source Block origin
+ shr (1) r7.5<1>:w acc0.5<0;1,0>:w 1:w
+ mov (2) acc0.0<1>:d r7.4<2;2,1>:w
+
+ //A
+ add (2) r50.0<1>:d acc0.0<2;2,1>:d -2:d { AccWrEn }
+ mov (1) r50.2<1>:ud 0x70013:ud
+ send (8) udCURR_UV(0)<1> r50 0x4 0x2890004:ud
+
+ //B
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d 8:d
+ send (8) udCURR_UV(8)<1> r50 0x4 0x2890004:ud
+
+ //C
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d 16:d
+ mov (1) r50.2<1>:ud 0x30013:ud
+ send (8) udCURR_UV(16)<1> r50 0x4 0x2490004:ud
+
+ //D
+ add (1) r50.0<1>:d acc0.0<0;1,0>:d 16:d { AccWrEn }
+ mov (1) r50.1<1>:d acc0.1<0;1,0>:d
+ mov (1) r50.2<1>:ud 0x70013:ud
+ send (8) udCURR_UV(20)<1> r50 0x4 0x2890004:ud
+
+ //E
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d 8:d
+ send (8) udCURR_UV(28)<1> r50 0x4 0x2890004:ud
+
+ //F
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d 16:d
+ mov (1) r50.2<1>:ud 0x30013:ud
+ send (8) udCURR_UV(36)<1> r50 0x4 0x2490004:ud
+
+ //History Origin, Current Y origin and BNE surface origin - all are in inline GRF. Use , . -rT.
+
+ //Calculate Origin For History Surface: (ORIX/4, ORIY/8)
+ mov (16) acc0.0<1>:w r7.0<0;2,1>:w
+ shr (1) r7.2<1>:w acc0.2<0;1,0>:w 2:w
+ shr (1) r7.3<1>:w acc0.3<0;1,0>:w 3:w
+
+ //Calculate Origin For BNE Surface: (ORIX/8, ORIY/16)
+ shr (1) r7.6<1>:w acc0.6<0;1,0>:w 3:w
+ shr (1) r7.7<1>:w acc0.7<0;1,0>:w 4:w
+
+
+
+//Module Name : DN_UV_PL2_Load_Prev_Frame_UV
+//Author : Tatiya, Rupesh
+//Description : Loads Pevious Frame UV data for PL2 input.
+
+
+
+//Module Name : DN_UV_Load_Prev_Frame_UV
+//Author : Tatiya, Rupesh
+//Description : Loads Prev Frame (UV only). U size - 16x16, V size - 16x16, UV size - 32x16, YUYV size - 64x16.
+
+
+
+
+ mov (2) r50.0<1>:d r7.4<2;2,1>:w { AccWrEn } // Source lock origin
+ mov (1) r50.2<1>:ud 0xF000F:ud // U/V block width and height (16x16)
+ send (8) udPREV_UV(0)<1> r50 0x4 0x2890001:ud
+
+ add (1) r50.0<1>:ud acc0.0<0;1,0>:d 16:w // Add 16 to X origin
+ send (8) udPREV_UV(8)<1> r50 0x4 0x2890001:ud
+
+
+ //TODO - See if History loading can be combined with Prev Frame Load. - rT
+
+
+//Module name : DN_UV_Load_Hist_UV
+//Author : Tatiya, Rupesh
+//Description : Load DN History for UV denoise. 4x4 for each U & V.
+
+
+
+
+ mov (2) r50.0<1>:d r7.2<2;2,1>:w
+ mov (1) r50.2<1>:ud 0x30007:ud
+ send (8) udHIST_UV(0)<1> r50 0x4 0x2190022:ud
+
+
+
+//Module Name : DN_UV_420_Load_Curr_Frame_Y
+//Author : Tatiya, Rupesh
+//Description : Load Curr Frame Y data for 420 Input
+
+
+
+//Module Name : DN_UV_Load_Curr_Frame_Y
+//Author : Tatiya, Rupesh
+//Description : Loads Y of Current frame.
+
+
+
+
+ //For 16x16 U and 16x16 V for 420, we need to read 32x32 Y.
+
+ mov (8) acc0.0<1>:ud r0.0<8;8,1>:ud
+ mov (1) acc0.2<1>:ud 0xF000F:ud
+ add (2) acc0.0<1>:ud r7.0<2;2,1>:w r4.4<2;2,1>:w
+
+ mov (8) r92.0<1>:ud acc0.0<8;8,1>:ud
+
+ mov (8) r101.0<1>:ud acc0.0<8;8,1>:ud
+ mov (8) r110.0<1>:ud acc0.0<8;8,1>:ud
+ mov (8) r119.0<1>:ud acc0.0<8;8,1>:ud
+
+ add (1) r101.1<1>:d acc0.1<0;1,0>:d 16:d
+
+ add (1) r110.0<1>:d acc0.0<0;1,0>:d 16:d
+
+ add (2) r119.0<1>:d acc0.0<2;2,1>:d 16:d
+
+ send (8) udCURR_Y0(0)<1> r92 0x4 0x2890003:ud
+ send (8) udCURR_Y1(0)<1> r101 0x4 0x2890003:ud
+ send (8) udCURR_Y2(0)<1> r110 0x4 0x2890003:ud
+ send (8) udCURR_Y3(0)<1> r119 0x4 0x2890003:ud
+
+
+
+//Module Name : DN_UV_Noise_Detection_UV
+//Author : Tatiya, Rupesh
+//Description : Performs noise detection on 16x16 U and 16x16 V each.
+
+
+
+//Module Name : DN_UV_Move_CURBE_Inline_UV.asm
+//Author : Tatiya, Rupesh
+
+
+
+
+ //Mov CURBE data to another space - so that it can be used as Temp Space --> r1 - r6
+ mov (4) r54.28<1>:ub r2.28<4;4,1>:ub //Dest. YUY2 offset
+ mov (2) r54.5<1>:ud r4.0<4;2,2>:ud //Src YUY2 offset and Origin offset
+ mov (4) r55.28<1>:ub r1.0<4;4,1>:ub
+
+ mov (8) r61.20<1>:ub r1.4<8;8,1>:ub
+ mov (4) r61.28<1>:ub r1.12<4;4,1>:ub
+
+ //Move Inline Data to another space - so that it can be used as Temp Space --> r7
+ mov (4) r62.10<1>:w r7.0<4;4,1>:w
+ mov (4) r63.10<1>:w r7.4<4;4,1>:w
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ mov (1) a0.0:uw 1664:uw
+ mov (1) a0.1:uw 1816:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 1792:uw
+ mov (1) a0.1:uw 1820:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 1920:uw
+ mov (1) a0.1:uw 1848:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2048:uw
+ mov (1) a0.1:uw 1852:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ mov (1) a0.0:uw 2304:uw
+ mov (1) a0.1:uw 1880:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2432:uw
+ mov (1) a0.1:uw 1884:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2560:uw
+ mov (1) a0.1:uw 1912:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2688:uw
+ mov (1) a0.1:uw 1916:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+//Module : DN_UV_Noise_Reduction_UV
+//Author : Tatiya, Rupesh
+//Description : Performs Noise Reduction on 16x16 U and 16x16 V.
+//Tasks : 1. Update weight history
+// 2. Find if it block is motion block
+// 3. Compute Denoised Pixel.
+
+
+
+
+//History is 1+1 byte every 4x4 U and 4x4 V.
+
+ cmp.l.f0.0 (16) null<1>:w ubHIST_UV(0,0)<16;16,1> r61.20<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w ubHIST_UV(0,0)<16;16,1> r61.22<0;2,1>:ub
+
+ mov (16) uwCURBE_TEMP(0)<1> 0:w
+ mov (16) uwCURBE_TEMP(1)<1> 0:w
+
+ //Compute diff betn curr and prev. - First 16 lines
+ // 8 lines here
+ add (16) wDIFF_TEMPORAL(0)<1> ubCURR_UV(2,2)<16;16,1> -ubPREV_UV(0,0)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(1)<1> ubCURR_UV(3,2)<16;16,1> -ubPREV_UV(0,16)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(2)<1> ubCURR_UV(4,2)<16;16,1> -ubPREV_UV(0,32)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(3)<1> ubCURR_UV(5,2)<16;16,1> -ubPREV_UV(0,48)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(4)<1> ubCURR_UV(6,2)<16;16,1> -ubPREV_UV(0,64)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(5)<1> ubCURR_UV(7,2)<16;16,1> -ubPREV_UV(0,80)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(6)<1> ubCURR_UV(8,2)<16;16,1> -ubPREV_UV(0,96)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(7)<1> ubCURR_UV(9,2)<16;16,1> -ubPREV_UV(0,112)<16;16,1> //Diff UV interleaved
+
+ //Update WT HIST
+ (-f0.0) shr (16) uwCURBE_TEMP(0)<1> ubHIST_UV(0,0)<16;16,1> 1:w
+ (f1.0) add (16) uwCURBE_TEMP(2)<1> ubHIST_UV(0,0)<16;16,1> r61.24<0;2,1>:ub
+ (f0.0) mov (16) uwCURBE_TEMP(2)<1> r61.20<0;2,1>:ub
+ (-f0.0.anyv) mov (16) uwCURBE_TEMP(2)<1> ubHIST_UV(0,0)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w ubHIST_UV(0,16)<16;16,1> r61.20<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w ubHIST_UV(0,16)<16;16,1> r61.22<0;2,1>:ub
+
+ //Compute diff betn curr and prev. - First 16 lines
+ // 8 more lines here
+ add (16) wDIFF_TEMPORAL(8)<1> ubCURR_UV(10,2)<16;16,1> -ubPREV_UV(0,128)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(9)<1> ubCURR_UV(11,2)<16;16,1> -ubPREV_UV(0,144)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(10)<1> ubCURR_UV(12,2)<16;16,1> -ubPREV_UV(0,160)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(11)<1> ubCURR_UV(13,2)<16;16,1> -ubPREV_UV(0,176)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(12)<1> ubCURR_UV(14,2)<16;16,1> -ubPREV_UV(0,192)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(13)<1> ubCURR_UV(15,2)<16;16,1> -ubPREV_UV(0,208)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(14)<1> ubCURR_UV(16,2)<16;16,1> -ubPREV_UV(0,224)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(15)<1> ubCURR_UV(17,2)<16;16,1> -ubPREV_UV(0,240)<16;16,1> //Diff UV interleaved
+
+ (-f0.0) shr (16) uwCURBE_TEMP(1)<1> ubHIST_UV(0,16)<16;16,1> 1:w
+ (f1.0) add (16) uwCURBE_TEMP(3)<1> ubHIST_UV(0,16)<16;16,1> r61.24<0;2,1>:ub
+ (f0.0) mov (16) uwCURBE_TEMP(3)<1> r61.20<0;2,1>:ub
+ (-f0.0.anyv) mov(16) uwCURBE_TEMP(3)<1> ubHIST_UV(0,16)<16;16,1>
+
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(0)<16;16,1> (abs)wDIFF_TEMPORAL(1)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(2)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(3)<16;16,1>
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(4)<16;16,1> (abs)wDIFF_TEMPORAL(5)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(6)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(7)<16;16,1>
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(8)<16;16,1> (abs)wDIFF_TEMPORAL(9)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(10)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(2)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(11)<16;16,1>
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(12)<16;16,1> (abs)wDIFF_TEMPORAL(13)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(14)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(3)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(15)<16;16,1>
+
+//Compute diff betn curr and prev. - Second 16 lines
+//13 lines.
+ add (16) wDIFF_TEMPORAL(16)<1> ubCURR_UV(22,2)<16;16,1> -ubPREV_UV(8,0)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(17)<1> ubCURR_UV(23,2)<16;16,1> -ubPREV_UV(8,16)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(18)<1> ubCURR_UV(24,2)<16;16,1> -ubPREV_UV(8,32)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(19)<1> ubCURR_UV(25,2)<16;16,1> -ubPREV_UV(8,48)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(20)<1> ubCURR_UV(26,2)<16;16,1> -ubPREV_UV(8,64)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(21)<1> ubCURR_UV(27,2)<16;16,1> -ubPREV_UV(8,80)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(22)<1> ubCURR_UV(28,2)<16;16,1> -ubPREV_UV(8,96)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(23)<1> ubCURR_UV(29,2)<16;16,1> -ubPREV_UV(8,112)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(24)<1> ubCURR_UV(30,2)<16;16,1> -ubPREV_UV(8,128)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(25)<1> ubCURR_UV(31,2)<16;16,1> -ubPREV_UV(8,144)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(26)<1> ubCURR_UV(32,2)<16;16,1> -ubPREV_UV(8,160)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(27)<1> ubCURR_UV(33,2)<16;16,1> -ubPREV_UV(8,176)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(28)<1> ubCURR_UV(34,2)<16;16,1> -ubPREV_UV(8,192)<16;16,1> //Diff UV interleaved
+
+//3 more lines
+ add (16) wCURBE_TEMP(4)<1> ubCURR_UV(35,2)<16;16,1> -ubPREV_UV(8,208)<16;16,1> //Diff UV interleaved
+ add (16) wCURBE_TEMP(5)<1> ubCURR_UV(36,2)<16;16,1> -ubPREV_UV(8,224)<16;16,1> //Diff UV interleaved
+ add (16) wCURBE_TEMP(6)<1> ubCURR_UV(37,2)<16;16,1> -ubPREV_UV(8,240)<16;16,1> //Diff UV interleaved
+
+ //16x4 to 8x4 - First 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+ //8x4 to 4x4 - First 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1> { AccWrEn }
+
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(16)<16;16,1> (abs)wDIFF_TEMPORAL(17)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(18)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(19)<16;16,1>
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(20)<16;16,1> (abs)wDIFF_TEMPORAL(21)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(22)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(23)<16;16,1>
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(24)<16;16,1> (abs)wDIFF_TEMPORAL(25)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(26)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(2)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(27)<16;16,1>
+
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(28)<16;16,1> (abs)wCURBE_TEMP(4)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wCURBE_TEMP(5)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(3)<1> acc0.0<16;16,1>:uw (abs)wCURBE_TEMP(6)<16;16,1>
+
+ //Find if block is motion block - First 16 lines
+ cmp.g.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<16;16,1> r61.26<0;2,1>:ub
+
+ //Move TEMPORAL_SUM4x4 for SIMD16 use later.
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,0)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,2)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,4)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,6)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,8)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,10)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,12)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,14)<0;2,1>
+
+ //Pick Appropriate Weight History Based on motion. - First 16 lines
+ (-f0.0) mov (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(2)<16;16,1>
+
+ //Actual DN - First 16 lines
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(0)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(0)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(2,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(2,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(2,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,0)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,8)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(0)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(0)<1> ubCURR_UV(2,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(0)<1> wDIFF_TEMPORAL(0)<16;16,1> ubCURR_UV(2,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(1)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(1)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(3,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(3,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(3,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,16)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,24)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(1)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(1)<1> ubCURR_UV(3,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(1)<1> wDIFF_TEMPORAL(1)<16;16,1> ubCURR_UV(3,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(2)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(2)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(4,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(4,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(4,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,32)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,40)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(2)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(2)<1> ubCURR_UV(4,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(2)<1> wDIFF_TEMPORAL(2)<16;16,1> ubCURR_UV(4,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(3)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(3)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(5,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(5,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(5,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,48)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,56)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(3)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(3)<1> ubCURR_UV(5,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(3)<1> wDIFF_TEMPORAL(3)<16;16,1> ubCURR_UV(5,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(4)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(4)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(6,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(6,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(6,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,64)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,72)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(4)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(4)<1> ubCURR_UV(6,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(4)<1> wDIFF_TEMPORAL(4)<16;16,1> ubCURR_UV(6,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(5)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(5)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(7,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(7,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(7,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,80)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,88)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(5)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(5)<1> ubCURR_UV(7,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(5)<1> wDIFF_TEMPORAL(5)<16;16,1> ubCURR_UV(7,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(6)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(6)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(8,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(8,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(8,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,96)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,104)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(6)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(6)<1> ubCURR_UV(8,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(6)<1> wDIFF_TEMPORAL(6)<16;16,1> ubCURR_UV(8,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(7)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(7)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(9,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(9,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(9,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,112)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,120)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(7)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(7)<1> ubCURR_UV(9,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(7)<1> wDIFF_TEMPORAL(7)<16;16,1> ubCURR_UV(9,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(8)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(8)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(10,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(10,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(10,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,128)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,136)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(8)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(8)<1> ubCURR_UV(10,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(8)<1> wDIFF_TEMPORAL(8)<16;16,1> ubCURR_UV(10,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(9)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(9)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(11,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(11,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(11,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,144)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,152)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(9)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(9)<1> ubCURR_UV(11,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(9)<1> wDIFF_TEMPORAL(9)<16;16,1> ubCURR_UV(11,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(10)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(10)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(12,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(12,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(12,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,160)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,168)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(10)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(10)<1> ubCURR_UV(12,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(10)<1> wDIFF_TEMPORAL(10)<16;16,1> ubCURR_UV(12,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(11)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(11)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(13,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(13,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(13,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,176)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,184)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(11)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(11)<1> ubCURR_UV(13,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(11)<1> wDIFF_TEMPORAL(11)<16;16,1> ubCURR_UV(13,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(12)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(12)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(14,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(14,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(14,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,192)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,200)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(12)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(12)<1> ubCURR_UV(14,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(12)<1> wDIFF_TEMPORAL(12)<16;16,1> ubCURR_UV(14,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(13)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(13)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(15,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(15,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(15,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,208)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,216)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(13)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(13)<1> ubCURR_UV(15,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(13)<1> wDIFF_TEMPORAL(13)<16;16,1> ubCURR_UV(15,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(14)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(14)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(16,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(16,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(16,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,224)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,232)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(14)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(14)<1> ubCURR_UV(16,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(14)<1> wDIFF_TEMPORAL(14)<16;16,1> ubCURR_UV(16,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(15)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(15)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(17,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(17,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(17,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,240)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,248)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(15)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(15)<1> ubCURR_UV(17,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(15)<1> wDIFF_TEMPORAL(15)<16;16,1> ubCURR_UV(17,2)<16;16,1>
+
+
+ //16x4 to 8x4 - Second 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+ //8x4 to 4x4 - Second 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1> { AccWrEn }
+
+ //Find if block is motion block - Second 16 lines
+ cmp.g.f1.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<16;16,1> r61.26<0;2,1>:ub
+
+ //Move TEMPORAL_SUM4x4 for SIMD16 use later.
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,0)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,2)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,4)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,6)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,8)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,10)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,12)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,14)<0;2,1>
+
+ //Pick Appropriate Weight History Based on motion. - Second 16 lines
+ (-f1.0) mov (16) uwCURBE_TEMP(1)<1> uwCURBE_TEMP(3)<16;16,1>
+
+ //Actual DN - Second 16 lines
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(16)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(16)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(22,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(22,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(22,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,0)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,8)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(16)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(16)<1> ubCURR_UV(22,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(16)<1> wDIFF_TEMPORAL(16)<16;16,1> ubCURR_UV(22,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(17)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(17)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(23,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(23,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(23,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,16)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,24)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(17)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(17)<1> ubCURR_UV(23,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(17)<1> wDIFF_TEMPORAL(17)<16;16,1> ubCURR_UV(23,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(18)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(18)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(24,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(24,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(24,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,32)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,40)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(18)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(18)<1> ubCURR_UV(24,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(18)<1> wDIFF_TEMPORAL(18)<16;16,1> ubCURR_UV(24,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(19)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(19)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(25,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(25,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(25,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,48)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,56)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(19)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(19)<1> ubCURR_UV(25,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(19)<1> wDIFF_TEMPORAL(19)<16;16,1> ubCURR_UV(25,2)<16;16,1>
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(20)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(20)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(26,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(26,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(26,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,64)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,72)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(20)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(20)<1> ubCURR_UV(26,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(20)<1> wDIFF_TEMPORAL(20)<16;16,1> ubCURR_UV(26,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(21)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(21)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(27,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(27,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(27,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,80)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,88)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(21)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(21)<1> ubCURR_UV(27,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(21)<1> wDIFF_TEMPORAL(21)<16;16,1> ubCURR_UV(27,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(22)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(22)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(28,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(28,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(28,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,96)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,104)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(22)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(22)<1> ubCURR_UV(28,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(22)<1> wDIFF_TEMPORAL(22)<16;16,1> ubCURR_UV(28,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(23)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(23)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(29,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(29,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(29,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,112)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,120)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(23)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(23)<1> ubCURR_UV(29,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(23)<1> wDIFF_TEMPORAL(23)<16;16,1> ubCURR_UV(29,2)<16;16,1>
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(24)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(24)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(30,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(30,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(30,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,128)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,136)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(24)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(24)<1> ubCURR_UV(30,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(24)<1> wDIFF_TEMPORAL(24)<16;16,1> ubCURR_UV(30,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(25)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(25)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(31,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(31,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(31,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,144)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,152)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(25)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(25)<1> ubCURR_UV(31,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(25)<1> wDIFF_TEMPORAL(25)<16;16,1> ubCURR_UV(31,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(26)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(26)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(32,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(32,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(32,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,160)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,168)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(26)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(26)<1> ubCURR_UV(32,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(26)<1> wDIFF_TEMPORAL(26)<16;16,1> ubCURR_UV(32,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(27)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(27)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(33,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(33,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(33,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,176)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,184)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(27)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(27)<1> ubCURR_UV(33,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(27)<1> wDIFF_TEMPORAL(27)<16;16,1> ubCURR_UV(33,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(28)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(28)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(34,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(34,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(34,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,192)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,200)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(28)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(28)<1> ubCURR_UV(34,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(28)<1> wDIFF_TEMPORAL(28)<16;16,1> ubCURR_UV(34,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wCURBE_TEMP(4)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wCURBE_TEMP(4)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(35,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(35,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(35,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,208)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,216)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wCURBE_TEMP(4)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wCURBE_TEMP(4)<1> ubCURR_UV(35,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wCURBE_TEMP(4)<1> wCURBE_TEMP(4)<16;16,1> ubCURR_UV(35,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wCURBE_TEMP(5)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wCURBE_TEMP(5)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(36,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(36,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(36,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,224)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,232)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wCURBE_TEMP(5)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wCURBE_TEMP(5)<1> ubCURR_UV(36,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wCURBE_TEMP(5)<1> wCURBE_TEMP(5)<16;16,1> ubCURR_UV(36,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wCURBE_TEMP(6)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wCURBE_TEMP(6)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(37,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(37,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(37,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,240)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,248)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wCURBE_TEMP(6)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wCURBE_TEMP(6)<1> ubCURR_UV(37,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wCURBE_TEMP(6)<1> wCURBE_TEMP(6)<16;16,1> ubCURR_UV(37,2)<16;16,1>
+
+ //Pack Weight History WORD -> BYTE
+ mov (16) ubCURBE_TEMP(3,0)<1> ubCURBE_TEMP(0)<32;16,2>
+ mov (16) ubCURBE_TEMP(3,16)<1> ubCURBE_TEMP(1)<32;16,2>
+
+
+
+//Module Name : DN_UV_Compute_BNE_UV
+//Author : Tatiya, Rupesh
+//Description : Computes minimum SOAD for each 16x4 block.
+
+ cmp.l.f0.0 (8) null:w uwSOAD_MIN_8x4(0,12)<16;4,1> uwSOAD_MIN_8x4(2,12)<16;4,1>
+ (f0.0)sel (8) uwCURBE_TEMP(1,0)<1> uwSOAD_MIN_8x4(0,12)<16;4,1> uwSOAD_MIN_8x4(2,12)<16;4,1>
+
+ mov (8) ubCURBE_TEMP(1)<1> ubCURBE_TEMP(1)<16;8,2>
+
+
+
+//Module Name : DN_UV_PL2_Pack_Denoised_UV
+//Name : Tatiya, Rupesh
+//Description : Pack UV denoised data based on PL2 input.
+
+
+
+//Module Name : DN_UV_Pack_Denoised_UV
+//Name : Tatiya, Rupesh
+//Description : Pack UV denoised data based on PL2/PL3/PA.
+
+
+//First 16 lines.
+ mov (16) ubMSGPAYLOAD_UV0(0,0)<1> ubDIFF_TEMPORAL(0)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(0,16)<1> ubDIFF_TEMPORAL(1)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(1,0)<1> ubDIFF_TEMPORAL(2)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(1,16)<1> ubDIFF_TEMPORAL(3)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(2,0)<1> ubDIFF_TEMPORAL(4)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(2,16)<1> ubDIFF_TEMPORAL(5)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(3,0)<1> ubDIFF_TEMPORAL(6)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(3,16)<1> ubDIFF_TEMPORAL(7)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(4,0)<1> ubDIFF_TEMPORAL(8)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(4,16)<1> ubDIFF_TEMPORAL(9)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(5,0)<1> ubDIFF_TEMPORAL(10)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(5,16)<1> ubDIFF_TEMPORAL(11)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(6,0)<1> ubDIFF_TEMPORAL(12)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(6,16)<1> ubDIFF_TEMPORAL(13)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(7,0)<1> ubDIFF_TEMPORAL(14)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV0(7,16)<1> ubDIFF_TEMPORAL(15)<32;16,2>
+
+//Second 16 lines.
+//12 lines first
+ mov (16) ubMSGPAYLOAD_UV1(0,0)<1> ubDIFF_TEMPORAL(16)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(0,16)<1> ubDIFF_TEMPORAL(17)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(1,0)<1> ubDIFF_TEMPORAL(18)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(1,16)<1> ubDIFF_TEMPORAL(19)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(2,0)<1> ubDIFF_TEMPORAL(20)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(2,16)<1> ubDIFF_TEMPORAL(21)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(3,0)<1> ubDIFF_TEMPORAL(22)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(3,16)<1> ubDIFF_TEMPORAL(23)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(4,0)<1> ubDIFF_TEMPORAL(24)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(4,16)<1> ubDIFF_TEMPORAL(25)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(5,0)<1> ubDIFF_TEMPORAL(26)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(5,16)<1> ubDIFF_TEMPORAL(27)<32;16,2>
+
+ //3 lines next
+ mov (16) ubMSGPAYLOAD_UV1(6,0)<1> ubDIFF_TEMPORAL(28)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(6,16)<1> ubCURBE_TEMP(4)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(7,0)<1> ubCURBE_TEMP(5)<32;16,2>
+ mov (16) ubMSGPAYLOAD_UV1(7,16)<1> ubCURBE_TEMP(6)<32;16,2>
+
+
+
+//Module Name : DN_UV_420_Save_Curr_Frame_Y
+//Author : Tatiya, Rupesh
+//Description : Save Curr Frame Y data for 420 Input
+
+
+
+//Module Name : DN_UV_Load_Curr_Frame_Y
+//Author : Tatiya, Rupesh
+//Description : Saves Y or YUY2 of Current frame.
+
+
+
+
+ mov (8) acc0.0<1>:ud r0.0<8;8,1>:ud
+ mov (2) acc0.0<1>:d r62.10<2;2,1>:w
+
+ mov (1) acc0.2<1>:d 0xF000F:ud
+
+ mov (8) r92.0<1>:ud acc0.0<8;8,1>:ud
+
+ mov (8) r101.0<1>:ud acc0.0<8;8,1>:ud
+ mov (8) r110.0<1>:ud acc0.0<8;8,1>:ud
+ mov (8) r119.0<1>:ud acc0.0<8;8,1>:ud
+
+ add (1) r101.1<1>:d acc0.1<0;1,0>:d 16:d
+
+ add (1) r110.0<1>:d acc0.0<0;1,0>:d 16:d
+
+ add (2) r119.0<1>:d acc0.0<2;2,1>:d 16:d
+
+ send (8) null<1>:d r92 0x5 0x120A8018:ud
+ send (8) null<1>:d r101 0x5 0x120A8018:ud
+ send (8) null<1>:d r110 0x5 0x120A8018:ud
+ send (8) null<1>:d r119 0x5 0x120A8018:ud
+
+
+ //TODO - See if History saving can be combined with Curr Frame Save. - rT
+
+
+//Module Name : DN_UV_Save_Hist_UV
+//Author : Tatiya, Rupesh
+//Description : Saves DN history for UV data.
+
+ mov (8) r3.0<1>:ud r0.0<8;8,1>:ud
+ mov (2) r3.0<1>:d r62.12<2;2,1>:w
+ mov (1) r3.2<1>:d 0x30007:ud
+
+ send (8) null<1>:d r3 0x5 0x40A8021:ud
+
+
+
+//Module Name : DN_UV_Save_BNE_UV
+//Author : Tatiya, Rupesh
+//Description : Saves BNE values for 16x16 U and 16x16 V.
+
+ mov (8) r1.0<1>:ud r0.0<8;8,1>:ud
+ mov (2) r1.0<1>:d r63.12<2;2,1>:w
+ mov (1) r1.2<1>:d 0x10003:ud
+
+ send (8) null<1>:d r1 0x5 0x40A8023:ud
+
+
+
+//Module Name : DN_UV_PL2_Save_Curr_Frame_UV
+//Author : Tatiya, Rupesh
+
+
+
+//Module name : DN_UV_Save_Curr_Frame_UV
+//Author : Tatiya, Rupesh
+//Description : Saves Current Frame (UV only).
+
+
+
+
+ mov (8) r74<1>:ud r0.0<8;8,1>:ud
+ mov (8) r83<1>:ud r0.0<8;8,1>:ud
+
+ mov (1) r74.0<1>:d r62.10<0;1,0>:w
+ shr (1) r74.1<1>:d r62.11<0;1,0>:w 1:w
+ mov (1) r74.2<1>:d 0xF000F:ud
+
+ add (1) r83.0<1>:d r62.10<0;1,0>:w 16:d
+ shr (1) r83.1<1>:d r62.11<0;1,0>:w 1:w
+ mov (1) r83.2<1>:d 0xF000F:ud
+
+ send (8) null<1>:d r74 0x5 0x120A8019:ud
+ send (8) null<1>:d r83 0x5 0x120A8019:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+ //All sub-routines here
+
+
+// Module Name : Noise_Detection
+// Author : Tatiya, Rupesh
+// Description : Performs noise detection on 32 pixels of U (8x4) and 32 pixels of V (8x4).
+
+DN_UV_NOISE_DETECTION_UV:
+
+// Find Field Block Median
+//
+// Purpose : Find the median value of the nine pixels in the same field
+// which are centered at current pixel.
+//
+// Works on 9 pixels centered at the current pixel
+// NOTE: pixels are within same field.
+// v4 - current pixel
+//
+// v2 v1 v0
+// * * * <--- Different field - not used
+// v5 v4 v3
+// * * * <--- Different field - not used
+// v8 v7 v6
+
+// Algorithm to find median modifies the data.
+// Copy the data needed to calculate median so the original source data stays intact.
+//
+
+//TODO - Change Interleaved implementation to separated one if - , does not work on predication. - rT
+
+//Delete Later - rT
+//mov (1) pCUR_UV:uw 52*32:uw
+
+// v0
+mov (16) ubMEDIAN_TEMP(0,0)<1> r[a0.0,0]<16;16,1>
+// v0
+mov (16) ubMEDIAN_TEMP(0,16)<1> r[a0.0,32]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(1,0)<1> r[a0.0,2]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(1,16)<1> r[a0.0,34]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(2,0)<1> r[a0.0,4]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(2,16)<1> r[a0.0,36]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(3,0)<1> r[a0.0,64]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(3,16)<1> r[a0.0,96]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(4,0)<1> r[a0.0,66]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(4,16)<1> r[a0.0,98]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(5,0)<1> r[a0.0,68]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(5,16)<1> r[a0.0,100]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(6,0)<1> r[a0.0,128]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(6,16)<1> r[a0.0,160]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(7,0)<1> r[a0.0,130]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(7,16)<1> r[a0.0,162]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(8,0)<1> r[a0.0,132]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(8,16)<1> r[a0.0,164]<16;16,1>
+
+//TODO - Optimize one instruction here.
+add (1) a0.0:uw a0.0<0;1,0>:uw 64:uw
+// v0
+mov (16) ubMEDIAN_TEMP(9,0)<1> r[a0.0,0]<16;16,1>
+// v0
+mov (16) ubMEDIAN_TEMP(9,16)<1> r[a0.0,32]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(10,0)<1> r[a0.0,2]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(10,16)<1> r[a0.0,34]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(11,0)<1> r[a0.0,4]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(11,16)<1> r[a0.0,36]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(12,0)<1> r[a0.0,64]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(12,16)<1> r[a0.0,96]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(13,0)<1> r[a0.0,66]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(13,16)<1> r[a0.0,98]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(14,0)<1> r[a0.0,68]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(14,16)<1> r[a0.0,100]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(15,0)<1> r[a0.0,128]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(15,16)<1> r[a0.0,160]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(16,0)<1> r[a0.0,130]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(16,16)<1> r[a0.0,162]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(17,0)<1> r[a0.0,132]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(17,16)<1> r[a0.0,164]<16;16,1>
+
+//TODO - Optimize one instruction here.
+add (1) a0.0:uw a0.0<0;1,0>:uw 64:uw
+
+// MedianSwap
+//
+// MedianSwap(inOutLeft, inOutRight)
+// {
+// if (inOutLeft > inOutRight)
+// {
+// temp = inOutLeft
+// inOutLeft = inOutRight
+// inOutRight = temp
+// }
+// }
+
+// MedianSwap(v1, v2) - U
+// MedianSwap(v4, v5) - U
+// MedianSwap(v1, v2) - V
+// MedianSwap(v4, v5) - V
+
+cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(1,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(5,0)<32;16,2>
+cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(1,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(1,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(1,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+(f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(5,0)<32;16,2>
+(f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+(f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubTEMP1(0,0)<16;16,1>
+(f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2> ubTEMP1(0,16)<16;16,1>
+(f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2> ubTEMP1(1,0)<16;16,1>
+(f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(7,0)<32;16,2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(0,0)<32;16,2> ubMEDIAN_TEMP(1,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(7,1)<32;16,2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(0,1)<32;16,2> ubMEDIAN_TEMP(1,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(7,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(0,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(7,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(0,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2> ubMEDIAN_TEMP(1,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2> ubMEDIAN_TEMP(1,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(1,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(1,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(3,0)<32;16,2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(6,0)<32;16,2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(3,1)<32;16,2> ubMEDIAN_TEMP(4,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(6,1)<32;16,2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(3,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(6,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(3,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(6,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(3,0)<2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(6,0)<2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(3,1)<2> ubMEDIAN_TEMP(4,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(6,1)<2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(4,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(1,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(1,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(1,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(1,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(7,0)<32;16,2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(0,0)<32;16,2> ubMEDIAN_TEMP(3,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(7,1)<32;16,2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(0,1)<32;16,2> ubMEDIAN_TEMP(3,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(7,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(0,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(7,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(0,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2> ubMEDIAN_TEMP(3,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2> ubMEDIAN_TEMP(3,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(3,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(3,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0 (16) null:w ubMEDIAN_TEMP(%1+0,0)<32;16,2> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0 (16) null:w ubMEDIAN_TEMP(%1+0,1)<32;16,2> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(5,0)<32;16,2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(5,1)<32;16,2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(5,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(5,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(5,0)<2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(5,1)<2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(3,0)<32;16,2> ubMEDIAN_TEMP(6,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(1,0)<32;16,2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(3,1)<32;16,2> ubMEDIAN_TEMP(6,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(1,1)<32;16,2> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(6,0)<2> ubMEDIAN_TEMP(3,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(1,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(6,1)<2> ubMEDIAN_TEMP(3,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(1,1)<32;16,2>
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(2,0)<32;16,2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(2,1)<32;16,2> ubMEDIAN_TEMP(5,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2> ubMEDIAN_TEMP(5,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(2,1)<2> ubTEMP1(0,16)<16;16,1>
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(6,0)<32;16,2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(6,1)<32;16,2> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(6,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(6,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(10,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(14,0)<32;16,2>
+cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(10,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(10,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(10,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+(f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(14,0)<32;16,2>
+(f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+(f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubTEMP1(0,0)<16;16,1>
+(f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2> ubTEMP1(0,16)<16;16,1>
+(f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2> ubTEMP1(1,0)<16;16,1>
+(f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(16,0)<32;16,2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(9,0)<32;16,2> ubMEDIAN_TEMP(10,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(16,1)<32;16,2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(9,1)<32;16,2> ubMEDIAN_TEMP(10,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(16,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(9,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(16,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(9,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2> ubMEDIAN_TEMP(10,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2> ubMEDIAN_TEMP(10,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(10,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(10,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(12,0)<32;16,2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(15,0)<32;16,2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(12,1)<32;16,2> ubMEDIAN_TEMP(13,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(15,1)<32;16,2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(12,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(15,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(12,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(15,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(12,0)<2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(15,0)<2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(12,1)<2> ubMEDIAN_TEMP(13,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(15,1)<2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(13,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(10,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(10,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(10,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(10,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(16,0)<32;16,2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(9,0)<32;16,2> ubMEDIAN_TEMP(12,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(16,1)<32;16,2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(9,1)<32;16,2> ubMEDIAN_TEMP(12,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(16,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(9,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(16,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(9,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2> ubMEDIAN_TEMP(12,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2> ubMEDIAN_TEMP(12,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(12,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(12,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0 (16) null:w ubMEDIAN_TEMP(%1+0,0)<32;16,2> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0 (16) null:w ubMEDIAN_TEMP(%1+0,1)<32;16,2> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(14,0)<32;16,2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(14,1)<32;16,2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(14,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(14,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(14,0)<2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(14,1)<2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(12,0)<32;16,2> ubMEDIAN_TEMP(15,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(10,0)<32;16,2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(12,1)<32;16,2> ubMEDIAN_TEMP(15,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(10,1)<32;16,2> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(15,0)<2> ubMEDIAN_TEMP(12,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(10,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(15,1)<2> ubMEDIAN_TEMP(12,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(10,1)<32;16,2>
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(11,0)<32;16,2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(11,1)<32;16,2> ubMEDIAN_TEMP(14,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2> ubMEDIAN_TEMP(14,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(11,1)<2> ubTEMP1(0,16)<16;16,1>
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(15,0)<32;16,2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(15,1)<32;16,2> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(15,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(15,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+// Sobel Value calculation for the current pixel v4
+// v2 v1 v0
+// * * * <--- Different field - not used
+// v5 v4 v3
+// * * * <--- Different field - not used
+// v8 v7 v6
+//
+// Gx = -v0 - 2*v3 - v6 + v2 + 2*v5 + v8
+// Gy = v0 + 2*v1 + v2 - v6 - 2*v7 - v8
+//
+// Sobel = (|Gx| + |Gy|) >> 3
+
+//TODO - Change Later - rT
+add (1) a0.0:uw a0.0<0;1,0>:uw -128:uw
+
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,64]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,132]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,0]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,128]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,4]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(0)<1> r[a0.0,68]<16;16,1>:ub 2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,96]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,164]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,32]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,160]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,36]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(1)<1> r[a0.0,100]<16;16,1>:ub 2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,128]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,196]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,64]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,192]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,68]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(2)<1> r[a0.0,132]<16;16,1>:ub 2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,160]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,228]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,96]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,224]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,100]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(3)<1> r[a0.0,164]<16;16,1>:ub 2:w
+
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,2]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,0]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,132]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,4]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,128]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,130]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(0)<16;16,1>
+
+shr (16) uwSOBEL(0)<1> acc0.0<16;16,1>:uw 3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,34]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,32]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,164]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,36]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,160]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,162]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(1)<16;16,1>
+
+shr (16) uwSOBEL(1)<1> acc0.0<16;16,1>:uw 3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,66]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,64]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,196]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,68]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,192]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,194]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(2)<16;16,1>
+
+shr (16) uwSOBEL(2)<1> acc0.0<16;16,1>:uw 3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,98]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,96]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,228]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,100]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,224]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,226]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(3)<16;16,1>
+
+shr (16) uwSOBEL(3)<1> acc0.0<16;16,1>:uw 3:uw
+
+//Mov Median in CURBE_TEMP to free up temp space.
+mov (16) ubMEDIAN(0,0)<1> ubMEDIAN_TEMP(4,0)<16;16,1>
+mov (16) ubMEDIAN(0,16)<1> ubMEDIAN_TEMP(4,16)<16;16,1>
+mov (16) ubMEDIAN(0,32)<1> ubMEDIAN_TEMP(13,0)<16;16,1>
+mov (16) ubMEDIAN(0,48)<1> ubMEDIAN_TEMP(13,16)<16;16,1>
+
+// Find:
+// absDiff = abs(ubCurY - ubMedian)
+// Find the difference between pixel and median value.
+
+//Median is interleaved. So difference is also interleaved.
+
+//------------------------------------------------------------------------------------------
+//Process 16 U and 16 V pixels here and rest later.
+// first row - v0,v1,v2
+add (16) wDIFF(0)<1> r[a0.0,0]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(1)<1> r[a0.0,2]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(2)<1> r[a0.0,4]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(3)<1> r[a0.0,64]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(4)<1> r[a0.0,66]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(5)<1> r[a0.0,68]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(6)<1> r[a0.0,128]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(7)<1> r[a0.0,130]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(8)<1> r[a0.0,132]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+// first row - v0,v1,v2
+add (16) wDIFF(9)<1> r[a0.0,32]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(10)<1> r[a0.0,34]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(11)<1> r[a0.0,36]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(12)<1> r[a0.0,96]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(13)<1> r[a0.0,98]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(14)<1> r[a0.0,100]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(15)<1> r[a0.0,160]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(16)<1> r[a0.0,162]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(17)<1> r[a0.0,164]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+
+//TODO - Change Later - rT
+add (1) a0.0:uw a0.0<0;1,0>:uw 64:uw
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//First 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(2)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(3)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(4)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(5)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(6)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(7)<16;16,1>
+ add (16) uwSOAD(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//------------
+ //DIFF(0-7) is not needed here. Populate it.
+ // first row - v0,v1,v2
+ add (16) wDIFF(0)<1> r[a0.0,0]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(1)<1> r[a0.0,2]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(2)<1> r[a0.0,4]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+
+ // second row - v3,v4,v5
+ add (16) wDIFF(3)<1> r[a0.0,64]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(4)<1> r[a0.0,66]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(5)<1> r[a0.0,68]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+
+ // third row - v6,v7
+ add (16) wDIFF(6)<1> r[a0.0,128]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(7)<1> r[a0.0,130]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(0)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+
+//------------
+ //Load v8 - DIFF(8)
+ add (16) wDIFF(8)<1> r[a0.0,132]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+//------------
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(11)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(12)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(13)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(14)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(15)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(16)<16;16,1>
+ add (16) uwSOAD(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//------------
+ //DIFF(0-7) is not needed here. Populate it.
+ // first row - v0,v1,v2
+ add (16) wDIFF(9)<1> r[a0.0,32]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(10)<1> r[a0.0,34]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(11)<1> r[a0.0,36]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+
+ // second row - v3,v4,v5
+ add (16) wDIFF(12)<1> r[a0.0,96]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(13)<1> r[a0.0,98]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(14)<1> r[a0.0,100]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+
+ // third row - v6,v7
+ add (16) wDIFF(15)<1> r[a0.0,160]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(16)<1> r[a0.0,162]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(1)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+
+//------------
+ //Load v8 - DIFF(8)
+ add (16) wDIFF(17)<1> r[a0.0,164]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max-block_min) < m_LocalDiffThreshold))
+// if (sigma_mb_min > sigma)
+// sigma_mb_min = sigma;
+
+//NOTE: block_min is always zero as median is one of the value in 3x3 block. So no need o calculate it.
+// So just do -
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max) < m_LocalDiffThreshold) && ( sigma < sigma_mb_min))
+// sigma_mb_min = sigma;
+
+//We are processing 32 bytes of U and 32 bytes of V - each of size 8x4.
+//Compare first 8 bytes with max possible (255).
+//Start above condition from second 8 bytes.
+
+//TODO - Change Later - rT
+// mov (1) pCUR_MIN_SOAD_8x4:uw 1752:uw //r54.24:ub
+
+//First row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(0)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(0)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(0)<16;16,1> 255:uw
+(f0.0) sel (16) uwSOBEL(0)<1> uwSOAD(0)<16;16,1> 255:uw
+
+//Second row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(1)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(1)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(1)<16;16,1> uwSOBEL(0)<16;16,1>
+(f0.0) mov (16) uwSOBEL(0)<1> uwSOAD(1)<16;16,1>
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//Second 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(2)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(3)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(4)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(5)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(6)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(7)<16;16,1>
+ add (16) uwSOAD(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(0)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(11)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(12)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(13)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(14)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(15)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(16)<16;16,1>
+ add (16) uwSOAD(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(1)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+
+//Third row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(2)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(0)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(0)<16;16,1> uwSOBEL(0)<16;16,1>
+(f0.0) mov (16) uwSOBEL(0)<1> uwSOAD(0)<16;16,1>
+
+//Fourth row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(3)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(1)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(1)<16;16,1> uwSOBEL(0)<16;16,1>
+(f0.0) mov (16) uwSOBEL(0)<1> uwSOAD(1)<16;16,1>
+
+ cmp.l.f0.0 (8) null:uw uwSOBEL(0,0)<8;8,1> uwSOBEL(0,8)<8;8,1>
+(f0.0) sel (8) uwSOBEL(0)<1> uwSOBEL(0,0)<8;8,1> uwSOBEL(0,8)<8;8,1>
+
+ cmp.l.f0.0 (4) null:uw uwSOBEL(0,0)<4;4,1> uwSOBEL(0,4)<4;4,1>
+(f0.0) sel (4) uwSOBEL(0)<1> uwSOBEL(0,0)<4;4,1> uwSOBEL(0,4)<4;4,1>
+
+ cmp.l.f0.0 (2) null:uw uwSOBEL(0,0)<2;2,1> uwSOBEL(0,2)<2;2,1>
+(f0.0) sel (2) r[a0.1,0]<1>:uw uwSOBEL(0,0)<2;2,1> uwSOBEL(0,2)<2;2,1>
+
+
+
+
+
+
+// End of common.inc
+
+mov (1) ip:ud r7.7<0;1,0>:d
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/NV12_DN_422CP.g4a b/src/shaders/post_processing/gen7/NV12_DN_422CP.g4a
new file mode 100644
index 0000000..926469e
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DN_422CP.g4a
@@ -0,0 +1,539 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 113 // Total instruction count
+// 1 // Total kernel count
+
+.kernel NV12_DN_422CP
+.code
+
+
+
+// FileName: DN_PL_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block) for planar format
+
+
+
+// FileName: DN.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x45E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(4,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+
+ mov (2) mudMSGHDR_HIST(1)<1> udDNDI_RESP(4,0)<2;2,1> // Move denoise history to MRF (4x2)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x10003:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x50003:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | X | X | X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ //| X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ mov (1) mubMSGHDR_ENC_STATS(1,0)<1> ubDNDI_RESP(4,8)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,3)<1> uwDNDI_RESP(4,11)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,4)<1> uwDNDI_RESP(4,12)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,9)<1> uwDNDI_RESP(4,8)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,10)<1> uwDNDI_RESP(4,9)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Load_UV_NV12_16x8.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x8 block through DATAPORT
+
+
+
+// FileName: UVCopy_Load_16x8.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x8 block through DATAPORT
+
+//CHANGE : Read extra UV data to convert to 422. -rT
+//we are reading extra data in ALL cases irrespective of whether upsampling is reqd or not later on, to keep things simple.
+
+
+ add (2) r27.0<1>:d r7.0<2;2,1>:w r4.4<2;2,1>:w // Source Y Block origin
+ asr (1) r27.1<1>:d r27.1<0;1,0>:d 1:w { NoDDClr } // U/V block origin should be half of Y's
+ mov (1) r27.2<1>:ud 0x4000F:ud { NoDDChk } // U/V block width and height (8x5)
+ mov (8) mudMSGHDR_UVCOPY(0)<1> r27.0<8;8,1>:ud
+ send (8) udDNDI_UV_RESP(0)<1> r36 0x4 0x2390001:ud
+
+ //Update Header for Save
+ mov (1) mudMSGHDR_UVCOPY(0,2)<1> 0x3000F:ud // U/V block width and height (8x4)
+
+
+
+// FileName: DN_Save_Y_16x8.asm
+// Author: Vivek Kumar
+// Description: Save one 16x8 blocks of Y channel of DN output for reference
+
+
+mov (8) mudDN_Y_OUT(0,0)<1> r0<8;8,1>:ud // message header
+mov (2) mudDN_Y_OUT(0,0)<1> r7.0<2;2,1>:w { NoDDClr } // X origin
+mov (1) mudDN_Y_OUT(0,2)<1> 0x7000F:ud { NoDDChk } // block width and height (16x8)
+
+//send out data through data port
+send (8) null<1>:d mudDN_Y_OUT 0x5 0xA0A8018:ud
+
+
+
+// FileName: DN_Save_UV_NV12_16x8.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x8 block through DATAPORT
+
+
+
+// FileName: UVCopy_Save_16x8.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x8 block through DATAPORT
+
+
+//Reuse the header from Load component
+//Header is modified at the end of load - to be usable for save.
+
+ mov (8) mudMSGHDR_UVCOPY(1)<1> udDNDI_UV_RESP(0)<8;8,1>
+ mov (8) mudMSGHDR_UVCOPY(2)<1> udDNDI_UV_RESP(1)<8;8,1>
+ send (8) null<1>:d r36 0x5 0x60A8019:ud
+
+
+
+// FileName: DN_Upsample_UV_NV12_16x8.asm
+// Author: Tatiya, Rupesh
+// Description: Upconvert 420 UV to 422
+
+
+
+// FileName: UVCopy_Upsample_UV_16x8.asm
+// Author: Tatiya, Rupesh
+// Description: Convert 42X UV to 422 - to be used for IECP.
+
+
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(0) ubDNDI_UV_RESP(0,0)<16;16,1> ubDNDI_UV_RESP(0,0)<16;16,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(1) ubDNDI_UV_RESP(0,0)<16;16,1> ubDNDI_UV_RESP(0,16)<16;16,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(2) ubDNDI_UV_RESP(0,16)<16;16,1> ubDNDI_UV_RESP(0,16)<16;16,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(3) ubDNDI_UV_RESP(0,16)<16;16,1> ubDNDI_UV_RESP(0,32)<16;16,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(4) ubDNDI_UV_RESP(0,32)<16;16,1> ubDNDI_UV_RESP(0,32)<16;16,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(5) ubDNDI_UV_RESP(0,32)<16;16,1> ubDNDI_UV_RESP(0,48)<16;16,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(6) ubDNDI_UV_RESP(0,48)<16;16,1> ubDNDI_UV_RESP(0,48)<16;16,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(7) ubDNDI_UV_RESP(0,48)<16;16,1> ubDNDI_UV_RESP(0,64)<16;16,1>
+
+ mov (16) ubDNDI_RESP(5,1)<2> ubDNDI_UVCOPY_TEMP(0,0)<32;8,4> { NoDDClr } //Copy U data
+ mov (16) ubDNDI_RESP(5,0)<2> ubDNDI_UVCOPY_TEMP(0,2)<32;8,4> { NoDDChk } //Copy V data
+ mov (16) ubDNDI_RESP(5,33)<2> ubDNDI_UVCOPY_TEMP(2,0)<32;8,4> { NoDDClr } //Copy U data
+ mov (16) ubDNDI_RESP(5,32)<2> ubDNDI_UVCOPY_TEMP(2,2)<32;8,4> { NoDDChk } //Copy V data
+ mov (16) ubDNDI_RESP(5,65)<2> ubDNDI_UVCOPY_TEMP(4,0)<32;8,4> { NoDDClr } //Copy U data
+ mov (16) ubDNDI_RESP(5,64)<2> ubDNDI_UVCOPY_TEMP(4,2)<32;8,4> { NoDDChk } //Copy V data
+ mov (16) ubDNDI_RESP(5,97)<2> ubDNDI_UVCOPY_TEMP(6,0)<32;8,4> { NoDDClr } //Copy U data
+ mov (16) ubDNDI_RESP(5,96)<2> ubDNDI_UVCOPY_TEMP(6,2)<32;8,4> { NoDDChk } //Copy V data
+
+
+
+// FileName: DN_Save_422CP_16x8.asm
+// Author: Vivek Kumar
+// Description: Save one 16x8 blocks of DN output to the color pipe in 4-2-2 format
+
+
+.declare mubMSGHDR_DN_OUT_2 Base=r36.0 ElementSize=1 Type=ub
+
+
+mov (8) mudMSGHDR_DN_OUT(0)<1> r0<8;8,1>:ud // message header
+shl (1) mdMSGHDR_DN_OUT(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin * 2 (422 output)
+mov (1) mdMSGHDR_DN_OUT(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_DN_OUT(0,2)<1> 0x7000F:ud { NoDDClr, NoDDChk } // block width and height (16x8)
+
+//M0.3 - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1) mudMSGHDR_DN_OUT(0,3)<1> r2.4<0;1,0>:ud r7.26<0;1,0>:b { NoDDChk }
+
+// First 8 x 8 Block
+ mov (8) mubMSGHDR_DN_OUT(1)<2> ubDNDI_RESP(0,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(1,16)<2> ubDNDI_RESP(0,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(2)<2> ubDNDI_RESP(0,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(2,16)<2> ubDNDI_RESP(0,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(3)<2> ubDNDI_RESP(0,64)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(3,16)<2> ubDNDI_RESP(0,80)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(4)<2> ubDNDI_RESP(0,96)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(4,16)<2> ubDNDI_RESP(0,112)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(1,1)<4> ubDNDI_RESP(5,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(1,17)<4> ubDNDI_RESP(5,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(1,3)<4> ubDNDI_RESP(5,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(1,19)<4> ubDNDI_RESP(5,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(2,1)<4> ubDNDI_RESP(5,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(2,17)<4> ubDNDI_RESP(5,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(2,3)<4> ubDNDI_RESP(5,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(2,19)<4> ubDNDI_RESP(5,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(3,1)<4> ubDNDI_RESP(5,65)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(3,17)<4> ubDNDI_RESP(5,81)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(3,3)<4> ubDNDI_RESP(5,64)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(3,19)<4> ubDNDI_RESP(5,80)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(4,1)<4> ubDNDI_RESP(5,97)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(4,17)<4> ubDNDI_RESP(5,113)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(4,3)<4> ubDNDI_RESP(5,96)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(4,19)<4> ubDNDI_RESP(5,112)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+// Second 8 x 8 Block
+mov (8) r36.0<1>:ud r31.0<8;8,1>:ud
+add (1) r36.0<1>:ud r36.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DN_OUT_2(1)<2> ubDNDI_RESP(0,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(1,16)<2> ubDNDI_RESP(0,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(2)<2> ubDNDI_RESP(0,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(2,16)<2> ubDNDI_RESP(0,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(3)<2> ubDNDI_RESP(0,72)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(3,16)<2> ubDNDI_RESP(0,88)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(4)<2> ubDNDI_RESP(0,104)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(4,16)<2> ubDNDI_RESP(0,120)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(1,1)<4> ubDNDI_RESP(5,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(1,17)<4> ubDNDI_RESP(5,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(1,3)<4> ubDNDI_RESP(5,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(1,19)<4> ubDNDI_RESP(5,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(2,1)<4> ubDNDI_RESP(5,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(2,17)<4> ubDNDI_RESP(5,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(2,3)<4> ubDNDI_RESP(5,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(2,19)<4> ubDNDI_RESP(5,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(3,1)<4> ubDNDI_RESP(5,73)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(3,17)<4> ubDNDI_RESP(5,89)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(3,3)<4> ubDNDI_RESP(5,72)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(3,19)<4> ubDNDI_RESP(5,88)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(4,1)<4> ubDNDI_RESP(5,105)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(4,17)<4> ubDNDI_RESP(5,121)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(4,3)<4> ubDNDI_RESP(5,104)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(4,19)<4> ubDNDI_RESP(5,120)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+//send out data through data port
+send (8) null<1>:d r31.0 0x5 0xA0A801B:ud
+send (8) null<1>:d r36.0 0x5 0xA0A801B:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/NV12_DN_NV12.g4a b/src/shaders/post_processing/gen7/NV12_DN_NV12.g4a
new file mode 100644
index 0000000..4c932b8
--- /dev/null
+++ b/src/shaders/post_processing/gen7/NV12_DN_NV12.g4a
@@ -0,0 +1,420 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 40 // Total instruction count
+// 1 // Total kernel count
+
+.kernel NV12_DN_NV12
+.code
+
+
+
+// FileName: DN_PL_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block) for planar format
+
+
+
+// FileName: DN.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x45E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(4,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+
+ mov (2) mudMSGHDR_HIST(1)<1> udDNDI_RESP(4,0)<2;2,1> // Move denoise history to MRF (4x2)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x10003:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x50003:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | X | X | X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ //| X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ mov (1) mubMSGHDR_ENC_STATS(1,0)<1> ubDNDI_RESP(4,8)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,3)<1> uwDNDI_RESP(4,11)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,4)<1> uwDNDI_RESP(4,12)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,9)<1> uwDNDI_RESP(4,8)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,10)<1> uwDNDI_RESP(4,9)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Load_UV_NV12_16x8.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x8 block through DATAPORT
+
+
+
+// FileName: UVCopy_Load_16x8.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x8 block through DATAPORT
+
+//CHANGE : Read extra UV data to convert to 422. -rT
+//we are reading extra data in ALL cases irrespective of whether upsampling is reqd or not later on, to keep things simple.
+
+
+ add (2) r27.0<1>:d r7.0<2;2,1>:w r4.4<2;2,1>:w // Source Y Block origin
+ asr (1) r27.1<1>:d r27.1<0;1,0>:d 1:w { NoDDClr } // U/V block origin should be half of Y's
+ mov (1) r27.2<1>:ud 0x4000F:ud { NoDDChk } // U/V block width and height (8x5)
+ mov (8) mudMSGHDR_UVCOPY(0)<1> r27.0<8;8,1>:ud
+ send (8) udDNDI_UV_RESP(0)<1> r36 0x4 0x2390001:ud
+
+ //Update Header for Save
+ mov (1) mudMSGHDR_UVCOPY(0,2)<1> 0x3000F:ud // U/V block width and height (8x4)
+
+
+
+// FileName: DN_Save_Y_16x8.asm
+// Author: Vivek Kumar
+// Description: Save one 16x8 blocks of Y channel of DN output for reference
+
+
+mov (8) mudDN_Y_OUT(0,0)<1> r0<8;8,1>:ud // message header
+mov (2) mudDN_Y_OUT(0,0)<1> r7.0<2;2,1>:w { NoDDClr } // X origin
+mov (1) mudDN_Y_OUT(0,2)<1> 0x7000F:ud { NoDDChk } // block width and height (16x8)
+
+//send out data through data port
+send (8) null<1>:d mudDN_Y_OUT 0x5 0xA0A8018:ud
+
+
+
+// FileName: DN_Save_UV_NV12_16x8.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x8 block through DATAPORT
+
+
+
+// FileName: UVCopy_Save_16x8.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x8 block through DATAPORT
+
+
+//Reuse the header from Load component
+//Header is modified at the end of load - to be usable for save.
+
+ mov (8) mudMSGHDR_UVCOPY(1)<1> udDNDI_UV_RESP(0)<8;8,1>
+ mov (8) mudMSGHDR_UVCOPY(2)<1> udDNDI_UV_RESP(1)<8;8,1>
+ send (8) null<1>:d r36 0x5 0x60A8019:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_AVS_Buf_0.g4a b/src/shaders/post_processing/gen7/PA_AVS_Buf_0.g4a
new file mode 100644
index 0000000..5a1c4b1
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_AVS_Buf_0.g4a
@@ -0,0 +1,539 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 39 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PA_AVS_Buf_0.asm
+// Author: Vivek Kumar
+// Description: Loads 8x8 AVS/IEF Packed data into Buffer 0
+
+
+
+// FileName : PA_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF Packed data into Buffer N
+
+//On IVB, for AVS module - set buffer pointers offset according to AVS Layout.
+//Change it to Sample Unorm layout in Shuffle modules.
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_0_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x50EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x00000000:ud // Enable ARGB channels
+
+
+ //OPT: rAVS_PAYLOAD.1 and .7 --> use NODDCLR, NODDCHK -rT
+ mov (1) r25.7<1>:ud r7.7:ud { NoDDClr }
+ mov (1) r25.1<1>:ud r7.12:uw { NoDDChk }
+
+
+ // set the vertical block number
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_AVS_WA_DONE_L0_0_
+
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f2.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+
+GEN7_AVS_WA_DONE_L0_0_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+ send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud
+ // Returns packed data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PA_AVS_Buf_1.g4a b/src/shaders/post_processing/gen7/PA_AVS_Buf_1.g4a
new file mode 100644
index 0000000..208f16d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_AVS_Buf_1.g4a
@@ -0,0 +1,531 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 37 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PA_AVS_Buf_1.asm
+// Author: Vivek Kumar
+// Description: Loads 8x8 AVS/IEF Packed data into Buffer 1
+
+
+
+// FileName : PA_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF Packed data into Buffer N
+
+//On IVB, for AVS module - set buffer pointers offset according to AVS Layout.
+//Change it to Sample Unorm layout in Shuffle modules.
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_1_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x50EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x00000000:ud // Enable ARGB channels
+
+
+ // set the vertical block number
+
+ add (1) r25.1<1>:ud r7.12:uw 1:ud
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_AVS_WA_DONE_L0_1_
+
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+
+GEN7_AVS_WA_DONE_L0_1_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+ send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud
+ // Returns packed data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_1_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PA_AVS_Buf_2.g4a b/src/shaders/post_processing/gen7/PA_AVS_Buf_2.g4a
new file mode 100644
index 0000000..f9ac1a7
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_AVS_Buf_2.g4a
@@ -0,0 +1,532 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 37 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PA_AVS_Buf_2.asm
+// Author: Vivek Kumar
+// Description: Loads 8x8 AVS/IEF Packed data into Buffer 2
+
+
+
+// FileName : PA_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF Packed data into Buffer N
+
+//On IVB, for AVS module - set buffer pointers offset according to AVS Layout.
+//Change it to Sample Unorm layout in Shuffle modules.
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_2_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x50EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x00000000:ud // Enable ARGB channels
+
+
+ // set the vertical block number
+
+
+ add (1) r25.1<1>:ud r7.12:uw 2:ud
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_AVS_WA_DONE_L0_2_
+
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+
+GEN7_AVS_WA_DONE_L0_2_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+ send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud
+ // Returns packed data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_2_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PA_AVS_Buf_3.g4a b/src/shaders/post_processing/gen7/PA_AVS_Buf_3.g4a
new file mode 100644
index 0000000..b4aec0a
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_AVS_Buf_3.g4a
@@ -0,0 +1,532 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 37 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PA_AVS_Buf_3.asm
+// Author: Vivek Kumar
+// Description: Loads 8x8 AVS/IEF Packed data into Buffer 3
+
+
+
+// FileName : PA_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF Packed data into Buffer N
+
+//On IVB, for AVS module - set buffer pointers offset according to AVS Layout.
+//Change it to Sample Unorm layout in Shuffle modules.
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_3_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x50EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x00000000:ud // Enable ARGB channels
+
+
+ // set the vertical block number
+
+
+ add (1) r25.1<1>:ud r7.12:uw 3:ud
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_AVS_WA_DONE_L0_3_
+
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+
+GEN7_AVS_WA_DONE_L0_3_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+ send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud
+ // Returns packed data in 16 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_3_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PA_DI_422CP.g4a b/src/shaders/post_processing/gen7/PA_DI_422CP.g4a
new file mode 100644
index 0000000..9f9bed0
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DI_422CP.g4a
@@ -0,0 +1,461 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 87 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PA_DI_422CP
+.code
+
+
+
+// FileName: DI.asm
+// Author: Vivek Kumar
+// Description: Tasks for DI only case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DI_Save_422CP_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in 422 format to Color Pipe (IECP)
+
+
+.declare mubMSGHDR_DI_OUT1_1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT1_2 Base=r21.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_1 Base=r24.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_2 Base=r27.0 ElementSize=1 Type=ub
+
+
+mov (8) r27.0<1>:ud r0.0<8;8,1>:ud
+shl (1) r27.0<1>:ud r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r27.1<1>:ud r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r27.2<1>:ud 0x3000F:ud { NoDDClr, NoDDChk } // Block width and height (16x8)
+
+//M0.3 - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1) r27.3<1>:ud r2.4<0;1,0>:ud r7.26<0;1,0>:b { NoDDChk }
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r24.0<1>:ud r27<8;8,1>:ud
+
+
+// Pack 2nd field Y; First 8x4 block
+ mov (8) mubMSGHDR_DI_OUT1_1(1)<2> ubDNDI_RESP(0,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(1,16)<2> ubDNDI_RESP(0,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(2)<2> ubDNDI_RESP(0,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(2,16)<2> ubDNDI_RESP(0,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; First 8x4 block
+ mov (4) mubMSGHDR_DI_OUT1_1(1,1)<4> ubDNDI_RESP(2,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(1,17)<4> ubDNDI_RESP(2,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_1(1,3)<4> ubDNDI_RESP(2,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(1,19)<4> ubDNDI_RESP(2,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,1)<4> ubDNDI_RESP(2,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,17)<4> ubDNDI_RESP(2,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_1(2,3)<4> ubDNDI_RESP(2,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,19)<4> ubDNDI_RESP(2,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+
+// Pack 2nd field Y; Second 8x4 block
+mov (8) r21.0<1>:ud r18.0<8;8,1>:ud
+add (1) r21.0<1>:ud r21.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DI_OUT1_2(1)<2> ubDNDI_RESP(0,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(1,16)<2> ubDNDI_RESP(0,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(2)<2> ubDNDI_RESP(0,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(2,16)<2> ubDNDI_RESP(0,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; Second 8x4 block
+ mov (4) mubMSGHDR_DI_OUT1_2(1,1)<4> ubDNDI_RESP(2,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(1,17)<4> ubDNDI_RESP(2,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_2(1,3)<4> ubDNDI_RESP(2,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(1,19)<4> ubDNDI_RESP(2,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,1)<4> ubDNDI_RESP(2,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,17)<4> ubDNDI_RESP(2,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_2(2,3)<4> ubDNDI_RESP(2,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,19)<4> ubDNDI_RESP(2,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+send (8) null<1>:d r18.0 0x5 0x60A801B:ud
+send (8) null<1>:d r21.0 0x5 0x60A801B:ud
+
+// Pack 1st field Y; 1st 8x4 block
+ mov (8) mubMSGHDR_DI_OUT2_1(1)<2> ubDNDI_RESP(4,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(1,16)<2> ubDNDI_RESP(4,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(2)<2> ubDNDI_RESP(4,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(2,16)<2> ubDNDI_RESP(4,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 1st field U,V; 1st 8x4 block
+ mov (4) mubMSGHDR_DI_OUT2_1(1,1)<4> ubDNDI_RESP(6,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(1,17)<4> ubDNDI_RESP(6,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_1(1,3)<4> ubDNDI_RESP(6,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(1,19)<4> ubDNDI_RESP(6,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,1)<4> ubDNDI_RESP(6,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,17)<4> ubDNDI_RESP(6,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_1(2,3)<4> ubDNDI_RESP(6,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,19)<4> ubDNDI_RESP(6,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+// Pack 1st field Y; 2nd 8x4 block
+mov (8) r27.0<1>:ud r24.0<8;8,1>:ud
+add (1) r27.0<1>:ud r27.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DI_OUT2_2(1)<2> ubDNDI_RESP(4,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(1,16)<2> ubDNDI_RESP(4,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(2)<2> ubDNDI_RESP(4,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(2,16)<2> ubDNDI_RESP(4,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 1st field U, V; 2nd 8x4 block
+ mov (4) mubMSGHDR_DI_OUT2_2(1,1)<4> ubDNDI_RESP(6,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(1,17)<4> ubDNDI_RESP(6,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_2(1,3)<4> ubDNDI_RESP(6,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(1,19)<4> ubDNDI_RESP(6,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,1)<4> ubDNDI_RESP(6,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,17)<4> ubDNDI_RESP(6,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_2(2,3)<4> ubDNDI_RESP(6,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,19)<4> ubDNDI_RESP(6,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+send (8) null<1>:d r24.0 0x5 0x60A801E:ud
+send (8) null<1>:d r27.0 0x5 0x60A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DI_PA.g4a b/src/shaders/post_processing/gen7/PA_DI_PA.g4a
new file mode 100644
index 0000000..f7a70f4
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DI_PA.g4a
@@ -0,0 +1,399 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 57 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PA_DI_PA
+.code
+
+
+
+// FileName: DI.asm
+// Author: Vivek Kumar
+// Description: Tasks for DI only case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DI_Save_PA_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+add (4) a0.4<1>:uw r2.28<4;4,1>:ub 608:w // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r27.2<1>:ud 0x3001F:ud { NoDDChk } // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r23.0<1>:ud r27<8;8,1>:ud
+
+// Pack 2nd field Y
+ mov (16) r[a0.4, 0]<2> ubDNDI_RESP(0,0) { NoDDClr }
+ mov (16) r[a0.4, 32]<2> ubDNDI_RESP(0,16) { NoDDClr }
+ mov (16) r[a0.4, 64]<2> ubDNDI_RESP(0,32) { NoDDClr }
+ mov (16) r[a0.4, 96]<2> ubDNDI_RESP(0,48) { NoDDClr }
+// Pack 2nd field U
+ mov (8) r[a0.5, 0]<4> ubDNDI_RESP(2,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 32]<4> ubDNDI_RESP(2,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 64]<4> ubDNDI_RESP(2,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 96]<4> ubDNDI_RESP(2,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 2nd field V
+ mov (8) r[a0.6, 0]<4> ubDNDI_RESP(2,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 32]<4> ubDNDI_RESP(2,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 64]<4> ubDNDI_RESP(2,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 96]<4> ubDNDI_RESP(2,48)<16;8,2> { NoDDChk } //Vpixels
+
+// Pack 1st field Y
+ mov (16) r[a0.4, 160]<2> ubDNDI_RESP(4,0) { NoDDClr }
+ mov (16) r[a0.4, 192]<2> ubDNDI_RESP(4,16) { NoDDClr }
+ mov (16) r[a0.4, 224]<2> ubDNDI_RESP(4,32) { NoDDClr }
+ mov (16) r[a0.4, 256]<2> ubDNDI_RESP(4,48) { NoDDClr }
+// Pack 1st field U
+ mov (8) r[a0.5, 160]<4> ubDNDI_RESP(6,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 192]<4> ubDNDI_RESP(6,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 224]<4> ubDNDI_RESP(6,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 256]<4> ubDNDI_RESP(6,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 1st field V
+ mov (8) r[a0.6, 160]<4> ubDNDI_RESP(6,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 192]<4> ubDNDI_RESP(6,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 224]<4> ubDNDI_RESP(6,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 256]<4> ubDNDI_RESP(6,48)<16;8,2> { NoDDChk } //Vpixels
+
+//save the previous frame
+send (8) null<1>:d r18.0 0x5 0xA0A801B:ud
+
+//save the current frame
+send (8) null<1>:d r23.0 0x5 0xA0A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DNDI_422CP.g4a b/src/shaders/post_processing/gen7/PA_DNDI_422CP.g4a
new file mode 100644
index 0000000..13302e8
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DNDI_422CP.g4a
@@ -0,0 +1,537 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 127 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PA_DNDI_422CP
+.code
+
+
+
+// FileName: DNDI_PL_Core.asm
+// Author: Tatiya, Rupesh
+
+
+
+// FileName: DNDI_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4CE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+ mov (1) mudMSGHDR_HIST(1)<1> udDNDI_RESP(9,0)<0;1,0> // Move denoise history to MRF (4x1)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x3:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Save_PA_16x4.asm
+// Author: Vivek Kumar
+// Description: Save one 16x4 blocks of DN output in Packed format for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1) null<1>:w r1.28<0;1,0>:ub 1:w
+
+add (4) a0.4<1>:uw r4.0<4;4,1>:ub 1024:w // Initial Y,U,V offset in YUV422 block; it starts at m14
+
+mov (8) mudMSGHDR_DN_OUT(0)<1> r0<8;8,1>:ud // message header
+shl (1) mdMSGHDR_DN_OUT(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin * 2 (422 output)
+mov (1) mdMSGHDR_DN_OUT(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_DN_OUT(0,2)<1> 0x3001F:ud { NoDDChk } // block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+ mov (16) r[a0.4, 0]<2>:ub ubDNDI_RESP(10,0) { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (16) r[a0.4, 32]<2>:ub ubDNDI_RESP(4,16) { NoDDClr } // 1st field luma from current frame (line 1,3)
+ mov (16) r[a0.4, 64]<2>:ub ubDNDI_RESP(10,16) { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (16) r[a0.4, 96]<2>:ub ubDNDI_RESP(5,16) { NoDDClr } // 1st field luma from current frame (line 1,3)
+ mov (8) r[a0.5, 0]<4>:ub ubDNDI_RESP(11,1)<16;8,2> { NoDDClr, NoDDChk } // 2nd field U from current frame (line 0,2)
+ mov (8) r[a0.5, 32]<4>:ub ubDNDI_RESP(6,17)<16;8,2> { NoDDClr, NoDDChk } // 1st field U from current frame (line 1,3)
+ mov (8) r[a0.5, 64]<4>:ub ubDNDI_RESP(11,17)<16;8,2> { NoDDClr, NoDDChk } // 2nd field U from current frame (line 0,2)
+ mov (8) r[a0.5, 96]<4>:ub ubDNDI_RESP(7,17)<16;8,2> { NoDDClr, NoDDChk } // 1st field U from current frame (line 1,3)
+ mov (8) r[a0.6, 0]<4>:ub ubDNDI_RESP(11,0)<16;8,2> { NoDDChk } // 2nd field V from current frame (line 0,2)
+ mov (8) r[a0.6, 32]<4>:ub ubDNDI_RESP(6,16)<16;8,2> { NoDDChk } // 1st field U from current frame (line 1,3)
+ mov (8) r[a0.6, 64]<4>:ub ubDNDI_RESP(11,16)<16;8,2> { NoDDChk } // 2nd field V from current frame (line 0,2)
+ mov (8) r[a0.6, 96]<4>:ub ubDNDI_RESP(7,16)<16;8,2> { NoDDChk } // 1st field U from current frame (line 1,3)
+jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+ mov (16) r[a0.4, 0]<2>:ub ubDNDI_RESP(4,0) { NoDDClr } // 1st field luma from current frame (line 0,2)
+ mov (16) r[a0.4, 32]<2>:ub ubDNDI_RESP(10,0) { NoDDClr } // 2nd field luma from current frame (line 1,3)
+ mov (16) r[a0.4, 64]<2>:ub ubDNDI_RESP(5,0) { NoDDClr } // 1st field luma from current frame (line 0,2)
+ mov (16) r[a0.4, 96]<2>:ub ubDNDI_RESP(10,16) { NoDDClr } // 2nd field luma from current frame (line 1,3)
+ mov (8) r[a0.5, 0]<4>:ub ubDNDI_RESP(6,1)<16;8,2> { NoDDClr, NoDDChk } // 1st field U from current frame (line 0,2)
+ mov (8) r[a0.5, 32]<4>:ub ubDNDI_RESP(11,1)<16;8,2> { NoDDClr, NoDDChk } // 2nd field U from current frame (line 1,3)
+ mov (8) r[a0.5, 64]<4>:ub ubDNDI_RESP(7,1)<16;8,2> { NoDDClr, NoDDChk } // 1st field U from current frame (line 0,2)
+ mov (8) r[a0.5, 96]<4>:ub ubDNDI_RESP(11,17)<16;8,2> { NoDDClr, NoDDChk } // 2nd field U from current frame (line 1,3)
+ mov (8) r[a0.6, 0]<4>:ub ubDNDI_RESP(6,0)<16;8,2> { NoDDChk } // 1st field V from current frame (line 0,2)
+ mov (8) r[a0.6, 32]<4>:ub ubDNDI_RESP(11,0)<16;8,2> { NoDDChk } // 2nd field V from current frame (line 1,3)
+ mov (8) r[a0.6, 64]<4>:ub ubDNDI_RESP(7,0)<16;8,2> { NoDDChk } // 1st field V from current frame (line 0,2)
+ mov (8) r[a0.6, 96]<4>:ub ubDNDI_RESP(11,16)<16;8,2> { NoDDChk } // 2nd field V from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8) null<1>:d r31.0 0x5 0xA0A8018:ud
+
+
+
+// FileName: DI_Save_422CP_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in 422 format to Color Pipe (IECP)
+
+
+.declare mubMSGHDR_DI_OUT1_1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT1_2 Base=r21.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_1 Base=r24.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_2 Base=r27.0 ElementSize=1 Type=ub
+
+
+mov (8) r27.0<1>:ud r0.0<8;8,1>:ud
+shl (1) r27.0<1>:ud r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r27.1<1>:ud r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r27.2<1>:ud 0x3000F:ud { NoDDClr, NoDDChk } // Block width and height (16x8)
+
+//M0.3 - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1) r27.3<1>:ud r2.4<0;1,0>:ud r7.26<0;1,0>:b { NoDDChk }
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r24.0<1>:ud r27<8;8,1>:ud
+
+
+// Pack 2nd field Y; First 8x4 block
+ mov (8) mubMSGHDR_DI_OUT1_1(1)<2> ubDNDI_RESP(0,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(1,16)<2> ubDNDI_RESP(0,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(2)<2> ubDNDI_RESP(0,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(2,16)<2> ubDNDI_RESP(0,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; First 8x4 block
+ mov (4) mubMSGHDR_DI_OUT1_1(1,1)<4> ubDNDI_RESP(2,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(1,17)<4> ubDNDI_RESP(2,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_1(1,3)<4> ubDNDI_RESP(2,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(1,19)<4> ubDNDI_RESP(2,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,1)<4> ubDNDI_RESP(2,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,17)<4> ubDNDI_RESP(2,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_1(2,3)<4> ubDNDI_RESP(2,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,19)<4> ubDNDI_RESP(2,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+
+// Pack 2nd field Y; Second 8x4 block
+mov (8) r21.0<1>:ud r18.0<8;8,1>:ud
+add (1) r21.0<1>:ud r21.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DI_OUT1_2(1)<2> ubDNDI_RESP(0,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(1,16)<2> ubDNDI_RESP(0,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(2)<2> ubDNDI_RESP(0,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(2,16)<2> ubDNDI_RESP(0,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; Second 8x4 block
+ mov (4) mubMSGHDR_DI_OUT1_2(1,1)<4> ubDNDI_RESP(2,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(1,17)<4> ubDNDI_RESP(2,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_2(1,3)<4> ubDNDI_RESP(2,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(1,19)<4> ubDNDI_RESP(2,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,1)<4> ubDNDI_RESP(2,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,17)<4> ubDNDI_RESP(2,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_2(2,3)<4> ubDNDI_RESP(2,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,19)<4> ubDNDI_RESP(2,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+send (8) null<1>:d r18.0 0x5 0x60A801B:ud
+send (8) null<1>:d r21.0 0x5 0x60A801B:ud
+
+// Pack 1st field Y; 1st 8x4 block
+ mov (8) mubMSGHDR_DI_OUT2_1(1)<2> ubDNDI_RESP(4,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(1,16)<2> ubDNDI_RESP(4,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(2)<2> ubDNDI_RESP(4,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(2,16)<2> ubDNDI_RESP(4,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 1st field U,V; 1st 8x4 block
+ mov (4) mubMSGHDR_DI_OUT2_1(1,1)<4> ubDNDI_RESP(6,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(1,17)<4> ubDNDI_RESP(6,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_1(1,3)<4> ubDNDI_RESP(6,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(1,19)<4> ubDNDI_RESP(6,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,1)<4> ubDNDI_RESP(6,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,17)<4> ubDNDI_RESP(6,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_1(2,3)<4> ubDNDI_RESP(6,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,19)<4> ubDNDI_RESP(6,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+// Pack 1st field Y; 2nd 8x4 block
+mov (8) r27.0<1>:ud r24.0<8;8,1>:ud
+add (1) r27.0<1>:ud r27.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DI_OUT2_2(1)<2> ubDNDI_RESP(4,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(1,16)<2> ubDNDI_RESP(4,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(2)<2> ubDNDI_RESP(4,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(2,16)<2> ubDNDI_RESP(4,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 1st field U, V; 2nd 8x4 block
+ mov (4) mubMSGHDR_DI_OUT2_2(1,1)<4> ubDNDI_RESP(6,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(1,17)<4> ubDNDI_RESP(6,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_2(1,3)<4> ubDNDI_RESP(6,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(1,19)<4> ubDNDI_RESP(6,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,1)<4> ubDNDI_RESP(6,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,17)<4> ubDNDI_RESP(6,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_2(2,3)<4> ubDNDI_RESP(6,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,19)<4> ubDNDI_RESP(6,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+send (8) null<1>:d r24.0 0x5 0x60A801E:ud
+send (8) null<1>:d r27.0 0x5 0x60A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DNDI_PA.g4a b/src/shaders/post_processing/gen7/PA_DNDI_PA.g4a
new file mode 100644
index 0000000..b42149c
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DNDI_PA.g4a
@@ -0,0 +1,475 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 97 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PA_DNDI_PA
+.code
+
+
+
+// FileName: DNDI_PL_Core.asm
+// Author: Tatiya, Rupesh
+
+
+
+// FileName: DNDI_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4CE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+ mov (1) mudMSGHDR_HIST(1)<1> udDNDI_RESP(9,0)<0;1,0> // Move denoise history to MRF (4x1)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x3:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Save_PA_16x4.asm
+// Author: Vivek Kumar
+// Description: Save one 16x4 blocks of DN output in Packed format for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1) null<1>:w r1.28<0;1,0>:ub 1:w
+
+add (4) a0.4<1>:uw r4.0<4;4,1>:ub 1024:w // Initial Y,U,V offset in YUV422 block; it starts at m14
+
+mov (8) mudMSGHDR_DN_OUT(0)<1> r0<8;8,1>:ud // message header
+shl (1) mdMSGHDR_DN_OUT(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin * 2 (422 output)
+mov (1) mdMSGHDR_DN_OUT(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_DN_OUT(0,2)<1> 0x3001F:ud { NoDDChk } // block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+ mov (16) r[a0.4, 0]<2>:ub ubDNDI_RESP(10,0) { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (16) r[a0.4, 32]<2>:ub ubDNDI_RESP(4,16) { NoDDClr } // 1st field luma from current frame (line 1,3)
+ mov (16) r[a0.4, 64]<2>:ub ubDNDI_RESP(10,16) { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (16) r[a0.4, 96]<2>:ub ubDNDI_RESP(5,16) { NoDDClr } // 1st field luma from current frame (line 1,3)
+ mov (8) r[a0.5, 0]<4>:ub ubDNDI_RESP(11,1)<16;8,2> { NoDDClr, NoDDChk } // 2nd field U from current frame (line 0,2)
+ mov (8) r[a0.5, 32]<4>:ub ubDNDI_RESP(6,17)<16;8,2> { NoDDClr, NoDDChk } // 1st field U from current frame (line 1,3)
+ mov (8) r[a0.5, 64]<4>:ub ubDNDI_RESP(11,17)<16;8,2> { NoDDClr, NoDDChk } // 2nd field U from current frame (line 0,2)
+ mov (8) r[a0.5, 96]<4>:ub ubDNDI_RESP(7,17)<16;8,2> { NoDDClr, NoDDChk } // 1st field U from current frame (line 1,3)
+ mov (8) r[a0.6, 0]<4>:ub ubDNDI_RESP(11,0)<16;8,2> { NoDDChk } // 2nd field V from current frame (line 0,2)
+ mov (8) r[a0.6, 32]<4>:ub ubDNDI_RESP(6,16)<16;8,2> { NoDDChk } // 1st field U from current frame (line 1,3)
+ mov (8) r[a0.6, 64]<4>:ub ubDNDI_RESP(11,16)<16;8,2> { NoDDChk } // 2nd field V from current frame (line 0,2)
+ mov (8) r[a0.6, 96]<4>:ub ubDNDI_RESP(7,16)<16;8,2> { NoDDChk } // 1st field U from current frame (line 1,3)
+jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+ mov (16) r[a0.4, 0]<2>:ub ubDNDI_RESP(4,0) { NoDDClr } // 1st field luma from current frame (line 0,2)
+ mov (16) r[a0.4, 32]<2>:ub ubDNDI_RESP(10,0) { NoDDClr } // 2nd field luma from current frame (line 1,3)
+ mov (16) r[a0.4, 64]<2>:ub ubDNDI_RESP(5,0) { NoDDClr } // 1st field luma from current frame (line 0,2)
+ mov (16) r[a0.4, 96]<2>:ub ubDNDI_RESP(10,16) { NoDDClr } // 2nd field luma from current frame (line 1,3)
+ mov (8) r[a0.5, 0]<4>:ub ubDNDI_RESP(6,1)<16;8,2> { NoDDClr, NoDDChk } // 1st field U from current frame (line 0,2)
+ mov (8) r[a0.5, 32]<4>:ub ubDNDI_RESP(11,1)<16;8,2> { NoDDClr, NoDDChk } // 2nd field U from current frame (line 1,3)
+ mov (8) r[a0.5, 64]<4>:ub ubDNDI_RESP(7,1)<16;8,2> { NoDDClr, NoDDChk } // 1st field U from current frame (line 0,2)
+ mov (8) r[a0.5, 96]<4>:ub ubDNDI_RESP(11,17)<16;8,2> { NoDDClr, NoDDChk } // 2nd field U from current frame (line 1,3)
+ mov (8) r[a0.6, 0]<4>:ub ubDNDI_RESP(6,0)<16;8,2> { NoDDChk } // 1st field V from current frame (line 0,2)
+ mov (8) r[a0.6, 32]<4>:ub ubDNDI_RESP(11,0)<16;8,2> { NoDDChk } // 2nd field V from current frame (line 1,3)
+ mov (8) r[a0.6, 64]<4>:ub ubDNDI_RESP(7,0)<16;8,2> { NoDDChk } // 1st field V from current frame (line 0,2)
+ mov (8) r[a0.6, 96]<4>:ub ubDNDI_RESP(11,16)<16;8,2> { NoDDChk } // 2nd field V from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8) null<1>:d r31.0 0x5 0xA0A8018:ud
+
+
+
+// FileName: DI_Save_PA_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+add (4) a0.4<1>:uw r2.28<4;4,1>:ub 608:w // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r27.2<1>:ud 0x3001F:ud { NoDDChk } // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r23.0<1>:ud r27<8;8,1>:ud
+
+// Pack 2nd field Y
+ mov (16) r[a0.4, 0]<2> ubDNDI_RESP(0,0) { NoDDClr }
+ mov (16) r[a0.4, 32]<2> ubDNDI_RESP(0,16) { NoDDClr }
+ mov (16) r[a0.4, 64]<2> ubDNDI_RESP(0,32) { NoDDClr }
+ mov (16) r[a0.4, 96]<2> ubDNDI_RESP(0,48) { NoDDClr }
+// Pack 2nd field U
+ mov (8) r[a0.5, 0]<4> ubDNDI_RESP(2,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 32]<4> ubDNDI_RESP(2,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 64]<4> ubDNDI_RESP(2,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 96]<4> ubDNDI_RESP(2,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 2nd field V
+ mov (8) r[a0.6, 0]<4> ubDNDI_RESP(2,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 32]<4> ubDNDI_RESP(2,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 64]<4> ubDNDI_RESP(2,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 96]<4> ubDNDI_RESP(2,48)<16;8,2> { NoDDChk } //Vpixels
+
+// Pack 1st field Y
+ mov (16) r[a0.4, 160]<2> ubDNDI_RESP(4,0) { NoDDClr }
+ mov (16) r[a0.4, 192]<2> ubDNDI_RESP(4,16) { NoDDClr }
+ mov (16) r[a0.4, 224]<2> ubDNDI_RESP(4,32) { NoDDClr }
+ mov (16) r[a0.4, 256]<2> ubDNDI_RESP(4,48) { NoDDClr }
+// Pack 1st field U
+ mov (8) r[a0.5, 160]<4> ubDNDI_RESP(6,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 192]<4> ubDNDI_RESP(6,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 224]<4> ubDNDI_RESP(6,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 256]<4> ubDNDI_RESP(6,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 1st field V
+ mov (8) r[a0.6, 160]<4> ubDNDI_RESP(6,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 192]<4> ubDNDI_RESP(6,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 224]<4> ubDNDI_RESP(6,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 256]<4> ubDNDI_RESP(6,48)<16;8,2> { NoDDChk } //Vpixels
+
+//save the previous frame
+send (8) null<1>:d r18.0 0x5 0xA0A801B:ud
+
+//save the current frame
+send (8) null<1>:d r23.0 0x5 0xA0A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DNUV_PA.g4a b/src/shaders/post_processing/gen7/PA_DNUV_PA.g4a
new file mode 100644
index 0000000..cb1fd9c
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DNUV_PA.g4a
@@ -0,0 +1,2704 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 1319 // Total instruction count
+// 1 // Total kernel count
+
+
+.kernel YUY2_DNUV_YUY2
+.code
+
+
+
+//Module : DN_UV_Setup
+//Author : Tatiya, Rupesh
+//Description : Initial Set-up for DN_UV
+
+
+
+
+// Module name : ChromaDenoise.inc
+// Author : Tatiya, Rupesh
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//======================================================
+//Interface for serpent mode Chroma Denoise, added by Le
+//======================================================
+//r1
+
+
+//noise history thresholds (low and high)
+
+
+//temporal difference thresholds (high and low)
+
+
+//noise history thresholds (low and high)
+//#define ubNoiseHistMaxHigh r1.22
+//#define ubNoiseHistMaxLow r1.23
+//#define ubNoiseHistDeltaHigh r1.24
+//#define ubNoiseHistDeltaLow r1.25
+
+//Gaussian thresholds
+
+
+//temporal difference thresholds (default)
+
+
+//r2
+//history thresholds (default)
+
+
+//denoise factor (0-63)
+
+
+//====================== Binding table (Explicit To DNUV)=========================================
+//Used by DN_UV kernels
+
+
+ //Pointer to Current Frame UV
+
+
+//r1-r6
+ //CURBE GRFs used as TEMP : Used for max computation and storing max temporarily. : r1-r6
+
+
+ .declare ubCURBE_TEMP Base=r1.0 ElementSize=1 Type=ub
+ .declare uwCURBE_TEMP Base=r1.0 ElementSize=2 Type=uw
+ .declare wCURBE_TEMP Base=r1.0 ElementSize=2 Type=w
+ .declare fCURBE_TEMP Base=r1.0 ElementSize=4 Type=f
+ .declare udCURBE_TEMP Base=r1.0 ElementSize=4 Type=ud
+ .declare uwMAX_ABS_DIFF Base=r5.0 ElementSize=2 Type=uw
+
+ //r1
+
+
+ //r3
+
+
+ //r4
+
+//r7
+ //All of the following has to defined in Same GRF for optimal performance.
+
+
+//r8-24
+ //Previous Frame UV
+
+ .declare udPREV_UV Base=r8.0 ElementSize=4 Type=ud
+ .declare ubPREV_UV Base=r8.0 ElementSize=1 Type=ub
+
+
+//r25-48
+ //TEMP Space for any Usage.
+
+
+//=========================================================================
+//Definations and declarations for serpent mode Chroma Denoise, added by Le
+//=========================================================================
+
+
+ .declare udGNE_UV Base=r24.0 ElementSize=4 Type=ud
+ .declare fGNE_UV Base=r24.0 ElementSize=4 Type=f
+ .declare ubGNE_UV Base=r24.0 ElementSize=1 Type=ub
+
+ .declare udMSGHDR_BNE_SERP Base=r25.0 ElementSize=4 Type=ud
+ .declare udMSGSRC_BNE_SERP Base=r26.0 ElementSize=4 Type=ud
+
+
+ .declare ubDN_UV_Thresholds Base=r26.0 ElementSize=1 Type=ub
+ .declare ubDN_UV_Thresholds_Temp Base=r27.0 ElementSize=1 Type=ub
+ .declare udDN_UV_Thresholds Base=r26.0 ElementSize=4 Type=ud
+ .declare udDN_UV_Thresholds_Temp Base=r27.0 ElementSize=4 Type=ud
+ .declare fDN_UV_Thresholds Base=r26.0 ElementSize=4 Type=f
+ .declare fDN_UV_Thresholds_Temp Base=r27.0 ElementSize=4 Type=f
+
+
+//====================================================================================
+
+
+ //TEMP23: To hold V data for PL3 surfaces
+ .declare udCURR_V_TEMP Base=r25.0 ElementSize=4 Type=ud
+ .declare ubCURR_V_TEMP Base=r25.0 ElementSize=1 Type=ub
+
+ //GRFs to calculate Median: r25-r42
+ .declare ubMEDIAN_TEMP Base=r25.0 ElementSize=1 Type=ub
+
+ //18 GRFs to hold difference : r25-r42
+ .declare wDIFF Base=r25.0 ElementSize=2 Type=w
+ .declare uwDIFF Base=r25.0 ElementSize=2 Type=uw
+
+ //Temporal Diff
+ .declare wDIFF_TEMPORAL Base=r25.0 ElementSize=2 Type=w
+ .declare ubDIFF_TEMPORAL Base=r25.0 ElementSize=1 Type=ub
+
+ //4 GRFs to hold Sobel Value : r43-46
+ .declare wSOBEL_X Base=r43.0 ElementSize=2 Type=w
+ .declare uwSOBEL Base=r43.0 ElementSize=2 Type=uw
+
+
+ //2 GRFs to hold SOAD temporarily: r47-48
+ .declare uwSOAD Base=r47.0 ElementSize=2 Type=uw
+
+ //Temp GRFs to hold extra YUYV pixels: r43-r48
+ .declare ubTEMP5 Base=r43.0 ElementSize=1 Type=ub
+
+ //Temp GRFs in Median Calculation: r47-r48
+ .declare ubTEMP1 Base=r47.0 ElementSize=1 Type=ub
+
+ .declare uwTEMP0 Base=r48.0 ElementSize=2 Type=uw
+ .declare ubTEMP0 Base=r48.0 ElementSize=1 Type=ub
+
+ //Temp Space to store Median : r49-50
+
+ .declare ubMEDIAN Base=r49.0 ElementSize=1 Type=ub
+
+//r49
+
+
+//r50
+ //Message Source
+
+
+//r51
+ //DN_UV History Surface
+
+ .declare udHIST_UV Base=r51.0 ElementSize=4 Type=ud
+ .declare ubHIST_UV Base=r51.0 ElementSize=1 Type=ub
+
+//r52 - r91
+ //r52
+ //Current Frame UV
+
+
+ .declare udCURR_UV Base=r52.0 ElementSize=4 Type=ud
+ .declare ubCURR_UV Base=r52.0 ElementSize=1 Type=ub
+
+ //r54
+ //CURBE COPY
+
+
+ //r55
+
+
+ .declare uwSOAD_MIN_8x4 Base=r56.0 ElementSize=2 Type=uw
+
+ //r61
+
+
+ //r62
+
+
+ //History Surface Temp Origin
+
+
+ //r63
+ //Current Frame Y Temp Origin
+
+
+ //BNE Surface Origin
+
+
+ //r70
+
+ .declare uwDIFF_TEMPORAL_SUM4x4 Base=r70.0 ElementSize=2 Type=uw //4 GRFs
+
+ //r74-91 : For Saving Dest UV (PL2/PL3)
+
+
+ .declare ubMSGPAYLOAD_UV0 Base=r75.0 ElementSize=1 Type=ub
+
+
+ .declare ubMSGPAYLOAD_U Base=r75.0 ElementSize=1 Type=ub
+
+
+ .declare ubMSGPAYLOAD_UV1 Base=r84.0 ElementSize=1 Type=ub
+
+
+ .declare ubMSGPAYLOAD_V Base=r84.0 ElementSize=1 Type=ub
+
+ //r90
+
+ .declare uwDIFF_TEMPORAL_SUM4x4_FINAL Base=r90.0 ElementSize=2 Type=uw //2 GRFs
+
+//r92-127
+ //Current Frame Y
+
+
+ //r92
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_0 Base=r92 ElementSize=2 Type=uw
+ //r101
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_1 Base=r101 ElementSize=2 Type=uw
+ //r110
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_2 Base=r110 ElementSize=2 Type=uw
+ //r119
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_3 Base=r119 ElementSize=2 Type=uw
+
+ .declare udCURR_Y0 Base=r93.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y0 Base=r93.0 ElementSize=1 Type=ub
+ .declare udCURR_Y1 Base=r102.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y1 Base=r102.0 ElementSize=1 Type=ub
+ .declare udCURR_Y2 Base=r111.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y2 Base=r111.0 ElementSize=1 Type=ub
+ .declare udCURR_Y3 Base=r120.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y3 Base=r120.0 ElementSize=1 Type=ub
+
+ //r92: To hold U data for PL3 surfaces
+ .declare udCURR_U_TEMP Base=r92.0 ElementSize=4 Type=ud
+ .declare ubCURR_U_TEMP Base=r92.0 ElementSize=1 Type=ub
+
+ //r112: To hold U data for PL3 surfaces
+ .declare udPREV_U_TEMP Base=r112.0 ElementSize=4 Type=ud
+ .declare ubPREV_U_TEMP Base=r112.0 ElementSize=1 Type=ub
+
+ //r120: To hold U data for PL3 surfaces
+ .declare udPREV_V_TEMP Base=r120.0 ElementSize=4 Type=ud
+ .declare ubPREV_V_TEMP Base=r120.0 ElementSize=1 Type=ub
+
+
+ // Initialize message source with r0.
+ mov (8) r50.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r92.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r101.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r110.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r119.0<1>:ud r0.0<8;8,1>:ud
+
+
+
+//Module Name : DN_UV_YUY2_Load_Curr_Frame_YUV
+//Author : Tatiya, Rupesh
+//Description : Loads Current Frame YUV data for YUY2 input.
+
+
+
+//Module name : DN_UV_Load_Curr_Frame_UV
+//Author : Tatiya, Rupesh
+//Description : Loads Current Frame (UV only).
+// We need 4 extra rows (2 per field) and 2 extra pixel (1 each side) for both U and V each.
+// The processing size is 16x16 U and V each. So we need : U size - 18x20, V size - 18x20, UV size - 36x20, YUYV size - 72x20.
+
+
+
+
+//72x20 interleaved YUYV block is partitioned as follows:
+// <------ 36 --------> <--------36 ------->
+// ------------------------------------------
+// | | 32x2 B1 | 32x2 B2 | |
+// | 4 |--------------------------------| 4 |
+// | x | | | x |
+// |20 | 32x8 A1 | 32x8 A3 | 20|
+// | |---------------|----------------| |
+// | C1| 32x8 A2 | 32x8 A4 | C2|
+// | | | | |
+// | |--------------------------------| |
+// | | 32x2 B3 | 32x2 B4 | |
+// ------------------------------------------
+//
+// Cordinates: (x, y), (x, y+8), (x+32, y), (x+32, y+8), (x-4, y-2), (x+64, y-2),(x, y-2), (x+32, y-2), (x, y+16), (x+32, y+16)
+
+ //UV surface origin: (2xORIX, ORIY)
+ add (2) r7.4<1>:w r7.0<2;2,1>:w r4.4<2;2,1>:w { AccWrEn } // Source Block origin
+ shl (1) r7.4<1>:w acc0.4<0;1,0>:w 1:w
+
+ //A1
+ mov (2) r92.0<1>:d r7.4<2;2,1>:w { AccWrEn } // Source Block origin
+ mov (1) r92.2<1>:ud 0x7001F:ud
+ send (8) udCURR_Y0(0)<1> r92 0x4 0x2890003:ud
+
+ //A2
+ mov (1) r101.0<1>:d acc0.0<0;1,0>:d
+ add (1) r101.1<1>:d acc0.1<0;1,0>:d 8:d
+ mov (1) r101.2<1>:ud 0x7001F:ud
+ send (8) udCURR_Y1(0)<1> r101 0x4 0x2890003:ud
+
+ //B1
+ mov (1) r50.0<1>:d acc0.0<0;1,0>:d
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d -2:d
+ mov (1) r50.2<1>:ud 0x1001F:ud
+ send (8) udCURR_UV(0)<1> r50 0x4 0x2290003:ud
+
+ //B3
+ mov (1) r50.0<1>:d acc0.0<0;1,0>:d
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d 16:d
+ send (8) udCURR_UV(18)<1> r50 0x4 0x2290003:ud
+
+ //C1
+ add (1) r50.0<1>:d acc0.0<0;1,0>:d -4:d
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d -2:d
+ mov (1) r50.2<1>:ud 0x130003:ud
+ send (8) ubTEMP5(0)<1> r50 0x4 0x2390003:ud
+
+ //A3
+ add (1) r110.0<1>:d acc0.0<0;1,0>:d 32:d
+ mov (1) r110.1<1>:d acc0.1<0;1,0>:d
+ mov (1) r110.2<1>:ud 0x7001F:ud
+ send (8) udCURR_Y2(0)<1> r110 0x4 0x2890003:ud
+
+ //A4
+ add (1) r119.0<1>:d acc0.0<0;1,0>:d 32:d
+ add (1) r119.1<1>:d acc0.1<0;1,0>:d 8:d
+ mov (1) r119.2<1>:ud 0x7001F:ud
+ send (8) udCURR_Y3(0)<1> r119 0x4 0x2890003:ud
+
+ //B2
+ add (1) r50.0<1>:d acc0.0<0;1,0>:d 32:d
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d -2:d
+ mov (1) r50.2<1>:ud 0x1001F:ud
+ send (8) udCURR_UV(20)<1> r50 0x4 0x2290003:ud
+
+ //B4
+ add (1) r50.0<1>:d acc0.0<0;1,0>:d 32:d
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d 16:d
+ send (8) udCURR_UV(38)<1> r50 0x4 0x2290003:ud
+
+ //C2
+ add (1) r50.0<1>:d acc0.0<0;1,0>:d 64:d
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d -2:d
+ mov (1) r50.2<1>:ud 0x130003:ud
+ send (8) ubTEMP5(3)<1> r50 0x4 0x2390003:ud
+
+ //History Origin, Current Y origin and BNE surface origin - all are in inline GRF. Use , . -rT.
+
+ //Calculate Origin For History Surface: (ORIX/4, ORIY/4)
+ shr (2) r7.2<1>:w r7.0<2;2,1>:w 2:w
+
+ //Calculate Origin For BNE Surface: (ORIX/8, ORIY/8)
+ shr (2) r7.6<1>:w r7.0<2;2,1>:w 3:w
+
+
+
+//Module Name : DN_UV_YUY2_Load_Prev_Frame_YUV.asm
+//Author : Tatiya, Rupesh
+//Description : Loads Pevious Frame YUV data for YUY2 input.
+
+
+
+//Module Name : DN_UV_Load_Prev_Frame_UV
+//Author : Tatiya, Rupesh
+//Description : Loads Prev Frame (UV only). U size - 16x16, V size - 16x16, UV size - 32x16, YUYV size - 64x16.
+
+
+
+
+ mov (2) r50.0<1>:d r7.4<2;2,1>:w { AccWrEn } // Source lock origin
+ mov (1) r50.2<1>:ud 0x7001F:ud // U/V block width and height (16x16)
+ send (8) udPREV_UV(0)<1> r50 0x4 0x2890000:ud
+
+ add (1) r50.1<1>:ud acc0.1<0;1,0>:d 8:w // Add 16 to X origin
+ send (8) udPREV_UV(8)<1> r50 0x4 0x2890000:ud
+
+ add (1) r50.0<1>:ud acc0.0<0;1,0>:d 32:w
+ mov (1) r50.1<1>:ud acc0.1<0;1,0>:d
+ send (8) udPREV_UV(16)<1> r50 0x4 0x2890000:ud
+
+ add (1) r50.1<1>:ud acc0.0<0;1,0>:d 8:w
+ send (8) udPREV_UV(24)<1> r50 0x4 0x2890000:ud
+
+
+ //TODO - See if History loading can be combined with Prev Frame Load. - rT
+
+
+//Module name : DN_UV_Load_Hist_UV
+//Author : Tatiya, Rupesh
+//Description : Load DN History for UV denoise. 4x4 for each U & V.
+
+
+
+
+ mov (2) r50.0<1>:d r7.2<2;2,1>:w
+ mov (1) r50.2<1>:ud 0x30007:ud
+ send (8) udHIST_UV(0)<1> r50 0x4 0x2190022:ud
+
+
+
+//Module Name: DN_UV_YUY2_Extract_Curr_Frame_UV
+//Author : Tatiya, Rupesh
+//Description: Extract UV data from current YUY2 frame.
+
+//72x20 interleaved YUYV block is partitioned as follows:
+// <------ 36 --------> <--------36 ------->
+// ------------------------------------------
+// | | 32x2 B1 | 32x2 B2 | |
+// | 4 |--------------------------------| 4 |
+// | x | | | x |
+// |20 | 32x8 A1 | 32x8 A3 | 20|
+// | |---------------|----------------| |
+// | C1| 32x8 A2 | 32x8 A4 | C2|
+// | | | | |
+// | |--------------------------------| |
+// | | 32x2 B3 | 32x2 B4 | |
+// ------------------------------------------
+
+ // Set SRC pointers according to Input packing i.e. YUYV, YVYU, UYVY, VYUY
+ add (1) a0.0<1>:uw r4.1<0;1,0>:ub 2976:w //A1
+ add (1) a0.1<1>:uw r4.1<0;1,0>:ub 3264:w //A2
+ add (1) a0.2<1>:uw r4.1<0;1,0>:ub 3552:w //A3
+ add (1) a0.3<1>:uw r4.1<0;1,0>:ub 3840:w //A4
+ add (1) a0.4<1>:uw r4.1<0;1,0>:ub 1664:w //B1
+ add (1) a0.5<1>:uw r4.1<0;1,0>:ub 2240:w //B3B2
+ add (1) a0.6<1>:uw r4.1<0;1,0>:ub 2880:w //B4
+ add (1) a0.7<1>:uw r4.1<0;1,0>:ub 1376:w //C1C2
+
+ //Left 20x20 UV : 16x16 UV (Original)+4 extra rows(2 per field on top/bottom)+4 extra pixels(2 on left/right)
+
+ //A1
+ mov (16) ubCURR_UV(2,2)<1> r[a0.0, 0]<32;16,2>
+ mov (16) ubCURR_UV(3,2)<1> r[a0.0, 32]<32;16,2>
+ mov (16) ubCURR_UV(4,2)<1> r[a0.0, 64]<32;16,2>
+ mov (16) ubCURR_UV(5,2)<1> r[a0.0, 96]<32;16,2>
+ mov (16) ubCURR_UV(6,2)<1> r[a0.0, 128]<32;16,2>
+ mov (16) ubCURR_UV(7,2)<1> r[a0.0, 160]<32;16,2>
+ mov (16) ubCURR_UV(8,2)<1> r[a0.0, 192]<32;16,2>
+ mov (16) ubCURR_UV(9,2)<1> r[a0.0, 224]<32;16,2>
+
+ //A2
+ mov (16) ubCURR_UV(10,2)<1> r[a0.1, 0]<32;16,2>
+ mov (16) ubCURR_UV(11,2)<1> r[a0.1, 32]<32;16,2>
+ mov (16) ubCURR_UV(12,2)<1> r[a0.1, 64]<32;16,2>
+ mov (16) ubCURR_UV(13,2)<1> r[a0.1, 96]<32;16,2>
+ mov (16) ubCURR_UV(14,2)<1> r[a0.1, 128]<32;16,2>
+ mov (16) ubCURR_UV(15,2)<1> r[a0.1, 160]<32;16,2>
+ mov (16) ubCURR_UV(16,2)<1> r[a0.1, 192]<32;16,2>
+ mov (16) ubCURR_UV(17,2)<1> r[a0.1, 224]<32;16,2>
+
+ //B1
+ mov (16) ubCURR_UV(0,2)<1> r[a0.4, 0]<32;16,2>
+ mov (16) ubCURR_UV(1,2)<1> r[a0.4, 32]<32;16,2>
+
+ //B3
+ mov (16) ubCURR_UV(18,2)<1> r[a0.5, 0]<32;16,2>
+ mov (16) ubCURR_UV(19,2)<1> r[a0.5, 32]<32;16,2>
+
+ //TODO - Find a way to reduce this 40 SIMD2 instructions - rT
+ //C1
+ mov (2) ubCURR_UV(0,0)<1> r[a0.7, 0]<4;2,2>
+ mov (2) ubCURR_UV(1,0)<1> r[a0.7, 4]<4;2,2>
+ mov (2) ubCURR_UV(2,0)<1> r[a0.7, 8]<4;2,2>
+ mov (2) ubCURR_UV(3,0)<1> r[a0.7, 12]<4;2,2>
+ mov (2) ubCURR_UV(4,0)<1> r[a0.7, 16]<4;2,2>
+ mov (2) ubCURR_UV(5,0)<1> r[a0.7, 20]<4;2,2>
+ mov (2) ubCURR_UV(6,0)<1> r[a0.7, 24]<4;2,2>
+ mov (2) ubCURR_UV(7,0)<1> r[a0.7, 28]<4;2,2>
+ mov (2) ubCURR_UV(8,0)<1> r[a0.7, 32]<4;2,2>
+ mov (2) ubCURR_UV(9,0)<1> r[a0.7, 36]<4;2,2>
+ mov (2) ubCURR_UV(10,0)<1> r[a0.7, 40]<4;2,2>
+ mov (2) ubCURR_UV(11,0)<1> r[a0.7, 44]<4;2,2>
+ mov (2) ubCURR_UV(12,0)<1> r[a0.7, 48]<4;2,2>
+ mov (2) ubCURR_UV(13,0)<1> r[a0.7, 52]<4;2,2>
+ mov (2) ubCURR_UV(14,0)<1> r[a0.7, 56]<4;2,2>
+ mov (2) ubCURR_UV(15,0)<1> r[a0.7, 60]<4;2,2>
+ mov (2) ubCURR_UV(16,0)<1> r[a0.7, 64]<4;2,2>
+ mov (2) ubCURR_UV(17,0)<1> r[a0.7, 68]<4;2,2>
+ mov (2) ubCURR_UV(18,0)<1> r[a0.7, 72]<4;2,2>
+ mov (2) ubCURR_UV(19,0)<1> r[a0.7, 76]<4;2,2>
+
+ //2 right bytes from B2 - 2 rows
+ mov (2) ubCURR_UV(0,18)<1> r[a0.5, 64]<4;2,2>
+ mov (2) ubCURR_UV(1,18)<1> r[a0.5, 96]<4;2,2>
+
+ //2 right bytes from A3 - 8 rows
+ mov (2) ubCURR_UV(2,18)<1> r[a0.2, 0]<4;2,2>
+ mov (2) ubCURR_UV(3,18)<1> r[a0.2, 32]<4;2,2>
+ mov (2) ubCURR_UV(4,18)<1> r[a0.2, 64]<4;2,2>
+ mov (2) ubCURR_UV(5,18)<1> r[a0.2, 96]<4;2,2>
+ mov (2) ubCURR_UV(6,18)<1> r[a0.2, 128]<4;2,2>
+ mov (2) ubCURR_UV(7,18)<1> r[a0.2, 160]<4;2,2>
+ mov (2) ubCURR_UV(8,18)<1> r[a0.2, 192]<4;2,2>
+ mov (2) ubCURR_UV(9,18)<1> r[a0.2, 224]<4;2,2>
+
+ //2 right bytes from A4 - 8 rows
+ mov (2) ubCURR_UV(10,18)<1> r[a0.3, 0]<4;2,2>
+ mov (2) ubCURR_UV(11,18)<1> r[a0.3, 32]<4;2,2>
+ mov (2) ubCURR_UV(12,18)<1> r[a0.3, 64]<4;2,2>
+ mov (2) ubCURR_UV(13,18)<1> r[a0.3, 96]<4;2,2>
+ mov (2) ubCURR_UV(14,18)<1> r[a0.3, 128]<4;2,2>
+ mov (2) ubCURR_UV(15,18)<1> r[a0.3, 160]<4;2,2>
+ mov (2) ubCURR_UV(16,18)<1> r[a0.3, 192]<4;2,2>
+ mov (2) ubCURR_UV(17,18)<1> r[a0.3, 224]<4;2,2>
+
+ //2 right bytes from B4 - 2 rows
+ mov (2) ubCURR_UV(18,18)<1> r[a0.6, 0]<4;2,2>
+ mov (2) ubCURR_UV(19,18)<1> r[a0.6, 32]<4;2,2>
+
+ //Right 20x20 UV : 16x16 UV (Original)+4 extra rows(2 per field on top/bottom)+4 extra pixels(2 on left/right)
+
+ //A3
+ mov (16) ubCURR_UV(22,2)<1> r[a0.2, 0]<32;16,2>
+ mov (16) ubCURR_UV(23,2)<1> r[a0.2, 32]<32;16,2>
+ mov (16) ubCURR_UV(24,2)<1> r[a0.2, 64]<32;16,2>
+ mov (16) ubCURR_UV(25,2)<1> r[a0.2, 96]<32;16,2>
+ mov (16) ubCURR_UV(26,2)<1> r[a0.2, 128]<32;16,2>
+ mov (16) ubCURR_UV(27,2)<1> r[a0.2, 160]<32;16,2>
+ mov (16) ubCURR_UV(28,2)<1> r[a0.2, 192]<32;16,2>
+ mov (16) ubCURR_UV(29,2)<1> r[a0.2, 224]<32;16,2>
+
+ //A4
+ mov (16) ubCURR_UV(30,2)<1> r[a0.3, 0]<32;16,2>
+ mov (16) ubCURR_UV(31,2)<1> r[a0.3, 32]<32;16,2>
+ mov (16) ubCURR_UV(32,2)<1> r[a0.3, 64]<32;16,2>
+ mov (16) ubCURR_UV(33,2)<1> r[a0.3, 96]<32;16,2>
+ mov (16) ubCURR_UV(34,2)<1> r[a0.3, 128]<32;16,2>
+ mov (16) ubCURR_UV(35,2)<1> r[a0.3, 160]<32;16,2>
+ mov (16) ubCURR_UV(36,2)<1> r[a0.3, 192]<32;16,2>
+ mov (16) ubCURR_UV(37,2)<1> r[a0.3, 224]<32;16,2>
+
+ //B2
+ mov (16) ubCURR_UV(20,2)<1> r[a0.5, 64]<32;16,2>
+ mov (16) ubCURR_UV(21,2)<1> r[a0.5, 96]<32;16,2>
+
+ //B4
+ mov (16) ubCURR_UV(38,2)<1> r[a0.6, 0]<32;16,2>
+ mov (16) ubCURR_UV(39,2)<1> r[a0.6, 32]<32;16,2>
+
+ //TODO - Find a way to reduce this 40 SIMD2 instructions - rT
+ //C2
+ mov (2) ubCURR_UV(20,18)<1> r[a0.7, 96]<4;2,2>
+ mov (2) ubCURR_UV(21,18)<1> r[a0.7, 100]<4;2,2>
+ mov (2) ubCURR_UV(22,18)<1> r[a0.7, 104]<4;2,2>
+ mov (2) ubCURR_UV(23,18)<1> r[a0.7, 108]<4;2,2>
+ mov (2) ubCURR_UV(24,18)<1> r[a0.7, 112]<4;2,2>
+ mov (2) ubCURR_UV(25,18)<1> r[a0.7, 116]<4;2,2>
+ mov (2) ubCURR_UV(26,18)<1> r[a0.7, 120]<4;2,2>
+ mov (2) ubCURR_UV(27,18)<1> r[a0.7, 124]<4;2,2>
+ mov (2) ubCURR_UV(28,18)<1> r[a0.7, 128]<4;2,2>
+ mov (2) ubCURR_UV(29,18)<1> r[a0.7, 132]<4;2,2>
+ mov (2) ubCURR_UV(30,18)<1> r[a0.7, 136]<4;2,2>
+ mov (2) ubCURR_UV(31,18)<1> r[a0.7, 140]<4;2,2>
+ mov (2) ubCURR_UV(32,18)<1> r[a0.7, 144]<4;2,2>
+ mov (2) ubCURR_UV(33,18)<1> r[a0.7, 148]<4;2,2>
+ mov (2) ubCURR_UV(34,18)<1> r[a0.7, 152]<4;2,2>
+ mov (2) ubCURR_UV(35,18)<1> r[a0.7, 156]<4;2,2>
+ mov (2) ubCURR_UV(36,18)<1> r[a0.7, 160]<4;2,2>
+ mov (2) ubCURR_UV(37,18)<1> r[a0.7, 164]<4;2,2>
+ mov (2) ubCURR_UV(38,18)<1> r[a0.7, 168]<4;2,2>
+ mov (2) ubCURR_UV(39,18)<1> r[a0.7, 172]<4;2,2>
+
+ //2 left bytes from B1 - 2 rows
+ mov (2) ubCURR_UV(20,0)<1> r[a0.4, 28]<4;2,2>
+ mov (2) ubCURR_UV(21,0)<1> r[a0.4, 60]<4;2,2>
+
+ //2 left bytes from A1 - 8 rows
+ mov (2) ubCURR_UV(22,0)<1> r[a0.0, 28]<4;2,2>
+ mov (2) ubCURR_UV(23,0)<1> r[a0.0, 60]<4;2,2>
+ mov (2) ubCURR_UV(24,0)<1> r[a0.0, 92]<4;2,2>
+ mov (2) ubCURR_UV(25,0)<1> r[a0.0, 124]<4;2,2>
+ mov (2) ubCURR_UV(26,0)<1> r[a0.0, 156]<4;2,2>
+ mov (2) ubCURR_UV(27,0)<1> r[a0.0, 188]<4;2,2>
+ mov (2) ubCURR_UV(28,0)<1> r[a0.0, 220]<4;2,2>
+ mov (2) ubCURR_UV(29,0)<1> r[a0.0, 252]<4;2,2>
+
+ //2 left bytes from A2 - 8 rows
+ mov (2) ubCURR_UV(30,0)<1> r[a0.1, 28]<4;2,2>
+ mov (2) ubCURR_UV(31,0)<1> r[a0.1, 60]<4;2,2>
+ mov (2) ubCURR_UV(32,0)<1> r[a0.1, 92]<4;2,2>
+ mov (2) ubCURR_UV(33,0)<1> r[a0.1, 124]<4;2,2>
+ mov (2) ubCURR_UV(34,0)<1> r[a0.1, 156]<4;2,2>
+ mov (2) ubCURR_UV(35,0)<1> r[a0.1, 188]<4;2,2>
+ mov (2) ubCURR_UV(36,0)<1> r[a0.1, 220]<4;2,2>
+ mov (2) ubCURR_UV(37,0)<1> r[a0.1, 252]<4;2,2>
+
+ //2 left bytes from B3 - 2 rows
+ mov (2) ubCURR_UV(38,0)<1> r[a0.5, 28]<4;2,2>
+ mov (2) ubCURR_UV(39,0)<1> r[a0.5, 60]<4;2,2>
+
+
+
+// Module Name : DN_UV_YUY2_Extract_Prev_Frame_UV
+// Author : Tatiya, Rupesh
+// Description : Extract UV from previous frame YUY2.
+
+ // Set SRC pointers according to Input packing i.e. YUYV, YVYU, UYVY, VYUY
+ add (1) a0.0<1>:uw r4.1<0;1,0>:ub 256:w
+ add (1) a0.1<1>:uw r4.1<0;1,0>:ub 768:w
+
+ mov (16) ubPREV_UV(0,0)<1> r[a0.0, 0]<32;16,2>:ub
+ mov (16) ubPREV_UV(0,16)<1> r[a0.0, 32]<32;16,2>:ub
+ mov (16) ubPREV_UV(1,0)<1> r[a0.0, 64]<32;16,2>:ub
+ mov (16) ubPREV_UV(1,16)<1> r[a0.0, 96]<32;16,2>:ub
+ mov (16) ubPREV_UV(2,0)<1> r[a0.0, 128]<32;16,2>:ub
+ mov (16) ubPREV_UV(2,16)<1> r[a0.0, 160]<32;16,2>:ub
+ mov (16) ubPREV_UV(3,0)<1> r[a0.0, 192]<32;16,2>:ub
+ mov (16) ubPREV_UV(3,16)<1> r[a0.0, 224]<32;16,2>:ub
+ mov (16) ubPREV_UV(4,0)<1> r[a0.0, 256]<32;16,2>:ub
+ mov (16) ubPREV_UV(4,16)<1> r[a0.0, 288]<32;16,2>:ub
+ mov (16) ubPREV_UV(5,0)<1> r[a0.0, 320]<32;16,2>:ub
+ mov (16) ubPREV_UV(5,16)<1> r[a0.0, 352]<32;16,2>:ub
+ mov (16) ubPREV_UV(6,0)<1> r[a0.0, 384]<32;16,2>:ub
+ mov (16) ubPREV_UV(6,16)<1> r[a0.0, 416]<32;16,2>:ub
+ mov (16) ubPREV_UV(7,0)<1> r[a0.0, 448]<32;16,2>:ub
+ mov (16) ubPREV_UV(7,16)<1> r[a0.0, 480]<32;16,2>:ub
+
+ mov (16) ubPREV_UV(8,0)<1> r[a0.1, 0]<32;16,2>:ub
+ mov (16) ubPREV_UV(8,16)<1> r[a0.1, 32]<32;16,2>:ub
+ mov (16) ubPREV_UV(9,0)<1> r[a0.1, 64]<32;16,2>:ub
+ mov (16) ubPREV_UV(9,16)<1> r[a0.1, 96]<32;16,2>:ub
+ mov (16) ubPREV_UV(10,0)<1> r[a0.1, 128]<32;16,2>:ub
+ mov (16) ubPREV_UV(10,16)<1> r[a0.1, 160]<32;16,2>:ub
+ mov (16) ubPREV_UV(11,0)<1> r[a0.1, 192]<32;16,2>:ub
+ mov (16) ubPREV_UV(11,16)<1> r[a0.1, 224]<32;16,2>:ub
+ mov (16) ubPREV_UV(12,0)<1> r[a0.1, 256]<32;16,2>:ub
+ mov (16) ubPREV_UV(12,16)<1> r[a0.1, 288]<32;16,2>:ub
+ mov (16) ubPREV_UV(13,0)<1> r[a0.1, 320]<32;16,2>:ub
+ mov (16) ubPREV_UV(13,16)<1> r[a0.1, 352]<32;16,2>:ub
+ mov (16) ubPREV_UV(14,0)<1> r[a0.1, 384]<32;16,2>:ub
+ mov (16) ubPREV_UV(14,16)<1> r[a0.1, 416]<32;16,2>:ub
+ mov (16) ubPREV_UV(15,0)<1> r[a0.1, 448]<32;16,2>:ub
+ mov (16) ubPREV_UV(15,16)<1> r[a0.1, 480]<32;16,2>:ub
+
+
+
+//Module Name : DN_UV_Noise_Detection_UV
+//Author : Tatiya, Rupesh
+//Description : Performs noise detection on 16x16 U and 16x16 V each.
+
+
+
+//Module Name : DN_UV_Move_CURBE_Inline_UV.asm
+//Author : Tatiya, Rupesh
+
+
+
+
+ //Mov CURBE data to another space - so that it can be used as Temp Space --> r1 - r6
+ mov (4) r54.28<1>:ub r2.28<4;4,1>:ub //Dest. YUY2 offset
+ mov (2) r54.5<1>:ud r4.0<4;2,2>:ud //Src YUY2 offset and Origin offset
+ mov (4) r55.28<1>:ub r1.0<4;4,1>:ub
+
+ mov (8) r61.20<1>:ub r1.4<8;8,1>:ub
+ mov (4) r61.28<1>:ub r1.12<4;4,1>:ub
+
+ //Move Inline Data to another space - so that it can be used as Temp Space --> r7
+ mov (4) r62.10<1>:w r7.0<4;4,1>:w
+ mov (4) r63.10<1>:w r7.4<4;4,1>:w
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ mov (1) a0.0:uw 1664:uw
+ mov (1) a0.1:uw 1816:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 1792:uw
+ mov (1) a0.1:uw 1820:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 1920:uw
+ mov (1) a0.1:uw 1848:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2048:uw
+ mov (1) a0.1:uw 1852:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ mov (1) a0.0:uw 2304:uw
+ mov (1) a0.1:uw 1880:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2432:uw
+ mov (1) a0.1:uw 1884:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2560:uw
+ mov (1) a0.1:uw 1912:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2688:uw
+ mov (1) a0.1:uw 1916:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+//Module : DN_UV_Noise_Reduction_UV
+//Author : Tatiya, Rupesh
+//Description : Performs Noise Reduction on 16x16 U and 16x16 V.
+//Tasks : 1. Update weight history
+// 2. Find if it block is motion block
+// 3. Compute Denoised Pixel.
+
+
+
+
+//History is 1+1 byte every 4x4 U and 4x4 V.
+
+ cmp.l.f0.0 (16) null<1>:w ubHIST_UV(0,0)<16;16,1> r61.20<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w ubHIST_UV(0,0)<16;16,1> r61.22<0;2,1>:ub
+
+ mov (16) uwCURBE_TEMP(0)<1> 0:w
+ mov (16) uwCURBE_TEMP(1)<1> 0:w
+
+ //Compute diff betn curr and prev. - First 16 lines
+ // 8 lines here
+ add (16) wDIFF_TEMPORAL(0)<1> ubCURR_UV(2,2)<16;16,1> -ubPREV_UV(0,0)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(1)<1> ubCURR_UV(3,2)<16;16,1> -ubPREV_UV(0,16)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(2)<1> ubCURR_UV(4,2)<16;16,1> -ubPREV_UV(0,32)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(3)<1> ubCURR_UV(5,2)<16;16,1> -ubPREV_UV(0,48)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(4)<1> ubCURR_UV(6,2)<16;16,1> -ubPREV_UV(0,64)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(5)<1> ubCURR_UV(7,2)<16;16,1> -ubPREV_UV(0,80)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(6)<1> ubCURR_UV(8,2)<16;16,1> -ubPREV_UV(0,96)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(7)<1> ubCURR_UV(9,2)<16;16,1> -ubPREV_UV(0,112)<16;16,1> //Diff UV interleaved
+
+ //Update WT HIST
+ (-f0.0) shr (16) uwCURBE_TEMP(0)<1> ubHIST_UV(0,0)<16;16,1> 1:w
+ (f1.0) add (16) uwCURBE_TEMP(2)<1> ubHIST_UV(0,0)<16;16,1> r61.24<0;2,1>:ub
+ (f0.0) mov (16) uwCURBE_TEMP(2)<1> r61.20<0;2,1>:ub
+ (-f0.0.anyv) mov (16) uwCURBE_TEMP(2)<1> ubHIST_UV(0,0)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w ubHIST_UV(0,16)<16;16,1> r61.20<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w ubHIST_UV(0,16)<16;16,1> r61.22<0;2,1>:ub
+
+ //Compute diff betn curr and prev. - First 16 lines
+ // 8 more lines here
+ add (16) wDIFF_TEMPORAL(8)<1> ubCURR_UV(10,2)<16;16,1> -ubPREV_UV(0,128)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(9)<1> ubCURR_UV(11,2)<16;16,1> -ubPREV_UV(0,144)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(10)<1> ubCURR_UV(12,2)<16;16,1> -ubPREV_UV(0,160)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(11)<1> ubCURR_UV(13,2)<16;16,1> -ubPREV_UV(0,176)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(12)<1> ubCURR_UV(14,2)<16;16,1> -ubPREV_UV(0,192)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(13)<1> ubCURR_UV(15,2)<16;16,1> -ubPREV_UV(0,208)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(14)<1> ubCURR_UV(16,2)<16;16,1> -ubPREV_UV(0,224)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(15)<1> ubCURR_UV(17,2)<16;16,1> -ubPREV_UV(0,240)<16;16,1> //Diff UV interleaved
+
+ (-f0.0) shr (16) uwCURBE_TEMP(1)<1> ubHIST_UV(0,16)<16;16,1> 1:w
+ (f1.0) add (16) uwCURBE_TEMP(3)<1> ubHIST_UV(0,16)<16;16,1> r61.24<0;2,1>:ub
+ (f0.0) mov (16) uwCURBE_TEMP(3)<1> r61.20<0;2,1>:ub
+ (-f0.0.anyv) mov(16) uwCURBE_TEMP(3)<1> ubHIST_UV(0,16)<16;16,1>
+
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(0)<16;16,1> (abs)wDIFF_TEMPORAL(1)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(2)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(3)<16;16,1>
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(4)<16;16,1> (abs)wDIFF_TEMPORAL(5)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(6)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(7)<16;16,1>
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(8)<16;16,1> (abs)wDIFF_TEMPORAL(9)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(10)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(2)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(11)<16;16,1>
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(12)<16;16,1> (abs)wDIFF_TEMPORAL(13)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(14)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(3)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(15)<16;16,1>
+
+//Compute diff betn curr and prev. - Second 16 lines
+//13 lines.
+ add (16) wDIFF_TEMPORAL(16)<1> ubCURR_UV(22,2)<16;16,1> -ubPREV_UV(8,0)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(17)<1> ubCURR_UV(23,2)<16;16,1> -ubPREV_UV(8,16)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(18)<1> ubCURR_UV(24,2)<16;16,1> -ubPREV_UV(8,32)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(19)<1> ubCURR_UV(25,2)<16;16,1> -ubPREV_UV(8,48)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(20)<1> ubCURR_UV(26,2)<16;16,1> -ubPREV_UV(8,64)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(21)<1> ubCURR_UV(27,2)<16;16,1> -ubPREV_UV(8,80)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(22)<1> ubCURR_UV(28,2)<16;16,1> -ubPREV_UV(8,96)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(23)<1> ubCURR_UV(29,2)<16;16,1> -ubPREV_UV(8,112)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(24)<1> ubCURR_UV(30,2)<16;16,1> -ubPREV_UV(8,128)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(25)<1> ubCURR_UV(31,2)<16;16,1> -ubPREV_UV(8,144)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(26)<1> ubCURR_UV(32,2)<16;16,1> -ubPREV_UV(8,160)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(27)<1> ubCURR_UV(33,2)<16;16,1> -ubPREV_UV(8,176)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(28)<1> ubCURR_UV(34,2)<16;16,1> -ubPREV_UV(8,192)<16;16,1> //Diff UV interleaved
+
+//3 more lines
+ add (16) wCURBE_TEMP(4)<1> ubCURR_UV(35,2)<16;16,1> -ubPREV_UV(8,208)<16;16,1> //Diff UV interleaved
+ add (16) wCURBE_TEMP(5)<1> ubCURR_UV(36,2)<16;16,1> -ubPREV_UV(8,224)<16;16,1> //Diff UV interleaved
+ add (16) wCURBE_TEMP(6)<1> ubCURR_UV(37,2)<16;16,1> -ubPREV_UV(8,240)<16;16,1> //Diff UV interleaved
+
+ //16x4 to 8x4 - First 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+ //8x4 to 4x4 - First 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1> { AccWrEn }
+
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(16)<16;16,1> (abs)wDIFF_TEMPORAL(17)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(18)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(19)<16;16,1>
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(20)<16;16,1> (abs)wDIFF_TEMPORAL(21)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(22)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(23)<16;16,1>
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(24)<16;16,1> (abs)wDIFF_TEMPORAL(25)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(26)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(2)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(27)<16;16,1>
+
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(28)<16;16,1> (abs)wCURBE_TEMP(4)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wCURBE_TEMP(5)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(3)<1> acc0.0<16;16,1>:uw (abs)wCURBE_TEMP(6)<16;16,1>
+
+ //Find if block is motion block - First 16 lines
+ cmp.g.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<16;16,1> r61.26<0;2,1>:ub
+
+ //Move TEMPORAL_SUM4x4 for SIMD16 use later.
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,0)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,2)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,4)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,6)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,8)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,10)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,12)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,14)<0;2,1>
+
+ //Pick Appropriate Weight History Based on motion. - First 16 lines
+ (-f0.0) mov (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(2)<16;16,1>
+
+ //Actual DN - First 16 lines
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(0)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(0)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(2,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(2,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(2,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,0)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,8)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(0)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(0)<1> ubCURR_UV(2,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(0)<1> wDIFF_TEMPORAL(0)<16;16,1> ubCURR_UV(2,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(1)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(1)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(3,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(3,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(3,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,16)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,24)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(1)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(1)<1> ubCURR_UV(3,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(1)<1> wDIFF_TEMPORAL(1)<16;16,1> ubCURR_UV(3,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(2)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(2)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(4,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(4,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(4,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,32)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,40)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(2)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(2)<1> ubCURR_UV(4,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(2)<1> wDIFF_TEMPORAL(2)<16;16,1> ubCURR_UV(4,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(3)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(3)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(5,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(5,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(5,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,48)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,56)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(3)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(3)<1> ubCURR_UV(5,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(3)<1> wDIFF_TEMPORAL(3)<16;16,1> ubCURR_UV(5,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(4)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(4)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(6,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(6,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(6,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,64)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,72)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(4)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(4)<1> ubCURR_UV(6,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(4)<1> wDIFF_TEMPORAL(4)<16;16,1> ubCURR_UV(6,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(5)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(5)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(7,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(7,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(7,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,80)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,88)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(5)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(5)<1> ubCURR_UV(7,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(5)<1> wDIFF_TEMPORAL(5)<16;16,1> ubCURR_UV(7,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(6)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(6)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(8,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(8,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(8,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,96)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,104)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(6)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(6)<1> ubCURR_UV(8,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(6)<1> wDIFF_TEMPORAL(6)<16;16,1> ubCURR_UV(8,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(7)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(7)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(9,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(9,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(9,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,112)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,120)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(7)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(7)<1> ubCURR_UV(9,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(7)<1> wDIFF_TEMPORAL(7)<16;16,1> ubCURR_UV(9,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(8)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(8)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(10,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(10,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(10,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,128)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,136)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(8)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(8)<1> ubCURR_UV(10,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(8)<1> wDIFF_TEMPORAL(8)<16;16,1> ubCURR_UV(10,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(9)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(9)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(11,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(11,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(11,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,144)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,152)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(9)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(9)<1> ubCURR_UV(11,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(9)<1> wDIFF_TEMPORAL(9)<16;16,1> ubCURR_UV(11,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(10)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(10)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(12,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(12,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(12,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,160)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,168)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(10)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(10)<1> ubCURR_UV(12,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(10)<1> wDIFF_TEMPORAL(10)<16;16,1> ubCURR_UV(12,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(11)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(11)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(13,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(13,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(13,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,176)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,184)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(11)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(11)<1> ubCURR_UV(13,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(11)<1> wDIFF_TEMPORAL(11)<16;16,1> ubCURR_UV(13,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(12)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(12)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(14,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(14,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(14,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,192)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,200)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(12)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(12)<1> ubCURR_UV(14,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(12)<1> wDIFF_TEMPORAL(12)<16;16,1> ubCURR_UV(14,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(13)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(13)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(15,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(15,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(15,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,208)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,216)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(13)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(13)<1> ubCURR_UV(15,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(13)<1> wDIFF_TEMPORAL(13)<16;16,1> ubCURR_UV(15,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(14)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(14)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(16,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(16,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(16,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,224)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,232)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(14)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(14)<1> ubCURR_UV(16,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(14)<1> wDIFF_TEMPORAL(14)<16;16,1> ubCURR_UV(16,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(15)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(15)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(17,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(17,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(17,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,240)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,248)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(15)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(15)<1> ubCURR_UV(17,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(15)<1> wDIFF_TEMPORAL(15)<16;16,1> ubCURR_UV(17,2)<16;16,1>
+
+
+ //16x4 to 8x4 - Second 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+ //8x4 to 4x4 - Second 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1> { AccWrEn }
+
+ //Find if block is motion block - Second 16 lines
+ cmp.g.f1.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<16;16,1> r61.26<0;2,1>:ub
+
+ //Move TEMPORAL_SUM4x4 for SIMD16 use later.
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,0)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,2)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,4)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,6)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,8)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,10)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,12)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,14)<0;2,1>
+
+ //Pick Appropriate Weight History Based on motion. - Second 16 lines
+ (-f1.0) mov (16) uwCURBE_TEMP(1)<1> uwCURBE_TEMP(3)<16;16,1>
+
+ //Actual DN - Second 16 lines
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(16)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(16)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(22,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(22,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(22,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,0)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,8)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(16)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(16)<1> ubCURR_UV(22,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(16)<1> wDIFF_TEMPORAL(16)<16;16,1> ubCURR_UV(22,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(17)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(17)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(23,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(23,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(23,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,16)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,24)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(17)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(17)<1> ubCURR_UV(23,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(17)<1> wDIFF_TEMPORAL(17)<16;16,1> ubCURR_UV(23,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(18)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(18)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(24,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(24,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(24,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,32)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,40)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(18)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(18)<1> ubCURR_UV(24,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(18)<1> wDIFF_TEMPORAL(18)<16;16,1> ubCURR_UV(24,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(19)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(19)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(25,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(25,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(25,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,48)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,56)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(19)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(19)<1> ubCURR_UV(25,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(19)<1> wDIFF_TEMPORAL(19)<16;16,1> ubCURR_UV(25,2)<16;16,1>
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(20)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(20)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(26,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(26,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(26,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,64)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,72)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(20)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(20)<1> ubCURR_UV(26,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(20)<1> wDIFF_TEMPORAL(20)<16;16,1> ubCURR_UV(26,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(21)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(21)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(27,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(27,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(27,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,80)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,88)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(21)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(21)<1> ubCURR_UV(27,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(21)<1> wDIFF_TEMPORAL(21)<16;16,1> ubCURR_UV(27,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(22)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(22)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(28,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(28,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(28,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,96)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,104)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(22)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(22)<1> ubCURR_UV(28,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(22)<1> wDIFF_TEMPORAL(22)<16;16,1> ubCURR_UV(28,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(23)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(23)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(29,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(29,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(29,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,112)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,120)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(23)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(23)<1> ubCURR_UV(29,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(23)<1> wDIFF_TEMPORAL(23)<16;16,1> ubCURR_UV(29,2)<16;16,1>
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(24)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(24)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(30,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(30,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(30,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,128)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,136)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(24)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(24)<1> ubCURR_UV(30,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(24)<1> wDIFF_TEMPORAL(24)<16;16,1> ubCURR_UV(30,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(25)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(25)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(31,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(31,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(31,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,144)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,152)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(25)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(25)<1> ubCURR_UV(31,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(25)<1> wDIFF_TEMPORAL(25)<16;16,1> ubCURR_UV(31,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(26)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(26)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(32,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(32,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(32,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,160)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,168)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(26)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(26)<1> ubCURR_UV(32,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(26)<1> wDIFF_TEMPORAL(26)<16;16,1> ubCURR_UV(32,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(27)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(27)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(33,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(33,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(33,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,176)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,184)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(27)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(27)<1> ubCURR_UV(33,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(27)<1> wDIFF_TEMPORAL(27)<16;16,1> ubCURR_UV(33,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(28)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(28)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(34,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(34,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(34,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,192)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,200)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(28)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(28)<1> ubCURR_UV(34,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(28)<1> wDIFF_TEMPORAL(28)<16;16,1> ubCURR_UV(34,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wCURBE_TEMP(4)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wCURBE_TEMP(4)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(35,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(35,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(35,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,208)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,216)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wCURBE_TEMP(4)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wCURBE_TEMP(4)<1> ubCURR_UV(35,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wCURBE_TEMP(4)<1> wCURBE_TEMP(4)<16;16,1> ubCURR_UV(35,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wCURBE_TEMP(5)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wCURBE_TEMP(5)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(36,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(36,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(36,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,224)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,232)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wCURBE_TEMP(5)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wCURBE_TEMP(5)<1> ubCURR_UV(36,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wCURBE_TEMP(5)<1> wCURBE_TEMP(5)<16;16,1> ubCURR_UV(36,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wCURBE_TEMP(6)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wCURBE_TEMP(6)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(37,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(37,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(37,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,240)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,248)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wCURBE_TEMP(6)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wCURBE_TEMP(6)<1> ubCURR_UV(37,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wCURBE_TEMP(6)<1> wCURBE_TEMP(6)<16;16,1> ubCURR_UV(37,2)<16;16,1>
+
+ //Pack Weight History WORD -> BYTE
+ mov (16) ubCURBE_TEMP(3,0)<1> ubCURBE_TEMP(0)<32;16,2>
+ mov (16) ubCURBE_TEMP(3,16)<1> ubCURBE_TEMP(1)<32;16,2>
+
+
+
+//Module Name : DN_UV_Compute_BNE_UV
+//Author : Tatiya, Rupesh
+//Description : Computes minimum SOAD for each 16x4 block.
+
+ cmp.l.f0.0 (8) null:w uwSOAD_MIN_8x4(0,12)<16;4,1> uwSOAD_MIN_8x4(2,12)<16;4,1>
+ (f0.0)sel (8) uwCURBE_TEMP(1,0)<1> uwSOAD_MIN_8x4(0,12)<16;4,1> uwSOAD_MIN_8x4(2,12)<16;4,1>
+
+ mov (8) ubCURBE_TEMP(1)<1> ubCURBE_TEMP(1)<16;8,2>
+
+
+
+//Module Name : DN_UV_YUY2_Pack_Denoised_UV
+//Name : Tatiya, Rupesh
+//Description : Pack UV denoised data based on YUY2 input.
+
+
+
+//Module Name : DN_UV_Pack_Denoised_UV
+//Name : Tatiya, Rupesh
+//Description : Pack UV denoised data based on PL2/PL3/PA.
+
+
+ add (1) a0.0<1>:uw r54.21<0;1,0>:ub 2976:w
+ add (1) a0.1<1>:uw r54.21<0;1,0>:ub 3264:w
+ add (1) a0.2<1>:uw r54.21<0;1,0>:ub 3552:w
+ add (1) a0.3<1>:uw r54.21<0;1,0>:ub 3840:w
+
+//First 8 lines.
+ mov (16) r[a0.0, 0]<2>:ub ubDIFF_TEMPORAL(0)<32;16,2>
+ mov (16) r[a0.0, 32]<2>:ub ubDIFF_TEMPORAL(1)<32;16,2>
+ mov (16) r[a0.0, 64]<2>:ub ubDIFF_TEMPORAL(2)<32;16,2>
+ mov (16) r[a0.0, 96]<2>:ub ubDIFF_TEMPORAL(3)<32;16,2>
+ mov (16) r[a0.0, 128]<2>:ub ubDIFF_TEMPORAL(4)<32;16,2>
+ mov (16) r[a0.0, 160]<2>:ub ubDIFF_TEMPORAL(5)<32;16,2>
+ mov (16) r[a0.0, 192]<2>:ub ubDIFF_TEMPORAL(6)<32;16,2>
+ mov (16) r[a0.0, 224]<2>:ub ubDIFF_TEMPORAL(7)<32;16,2>
+
+//Second 8 lines
+ mov (16) r[a0.1, 0]<2>:ub ubDIFF_TEMPORAL(8)<32;16,2>
+ mov (16) r[a0.1, 32]<2>:ub ubDIFF_TEMPORAL(9)<32;16,2>
+ mov (16) r[a0.1, 64]<2>:ub ubDIFF_TEMPORAL(10)<32;16,2>
+ mov (16) r[a0.1, 96]<2>:ub ubDIFF_TEMPORAL(11)<32;16,2>
+ mov (16) r[a0.1, 128]<2>:ub ubDIFF_TEMPORAL(12)<32;16,2>
+ mov (16) r[a0.1, 160]<2>:ub ubDIFF_TEMPORAL(13)<32;16,2>
+ mov (16) r[a0.1, 192]<2>:ub ubDIFF_TEMPORAL(14)<32;16,2>
+ mov (16) r[a0.1, 224]<2>:ub ubDIFF_TEMPORAL(15)<32;16,2>
+
+//Third 8 lines
+ mov (16) r[a0.2, 0]<2>:ub ubDIFF_TEMPORAL(16)<32;16,2>
+ mov (16) r[a0.2, 32]<2>:ub ubDIFF_TEMPORAL(17)<32;16,2>
+ mov (16) r[a0.2, 64]<2>:ub ubDIFF_TEMPORAL(18)<32;16,2>
+ mov (16) r[a0.2, 96]<2>:ub ubDIFF_TEMPORAL(19)<32;16,2>
+ mov (16) r[a0.2, 128]<2>:ub ubDIFF_TEMPORAL(20)<32;16,2>
+ mov (16) r[a0.2, 160]<2>:ub ubDIFF_TEMPORAL(21)<32;16,2>
+ mov (16) r[a0.2, 192]<2>:ub ubDIFF_TEMPORAL(22)<32;16,2>
+ mov (16) r[a0.2, 224]<2>:ub ubDIFF_TEMPORAL(23)<32;16,2>
+
+//Fourth 8 lines
+//5 lines first
+ mov (16) r[a0.3, 0]<2>:ub ubDIFF_TEMPORAL(24)<32;16,2>
+ mov (16) r[a0.3, 32]<2>:ub ubDIFF_TEMPORAL(25)<32;16,2>
+ mov (16) r[a0.3, 64]<2>:ub ubDIFF_TEMPORAL(26)<32;16,2>
+ mov (16) r[a0.3, 96]<2>:ub ubDIFF_TEMPORAL(27)<32;16,2>
+ mov (16) r[a0.3, 128]<2>:ub ubDIFF_TEMPORAL(28)<32;16,2>
+
+//3 more lines
+ mov (16) r[a0.3, 160]<2>:ub ubCURBE_TEMP(4)<32;16,2>
+ mov (16) r[a0.3, 192]<2>:ub ubCURBE_TEMP(5)<32;16,2>
+ mov (16) r[a0.3, 224]<2>:ub ubCURBE_TEMP(6)<32;16,2>
+
+
+ //TODO - See if History saving can be combined with Curr Frame Save. - rT
+
+
+//Module Name : DN_UV_Save_Hist_UV
+//Author : Tatiya, Rupesh
+//Description : Saves DN history for UV data.
+
+ mov (8) r3.0<1>:ud r0.0<8;8,1>:ud
+ mov (2) r3.0<1>:d r62.12<2;2,1>:w
+ mov (1) r3.2<1>:d 0x30007:ud
+
+ send (8) null<1>:d r3 0x5 0x40A8021:ud
+
+
+
+//Module Name : DN_UV_Save_BNE_UV
+//Author : Tatiya, Rupesh
+//Description : Saves BNE values for 16x16 U and 16x16 V.
+
+ mov (8) r1.0<1>:ud r0.0<8;8,1>:ud
+ mov (2) r1.0<1>:d r63.12<2;2,1>:w
+ mov (1) r1.2<1>:d 0x10003:ud
+
+ send (8) null<1>:d r1 0x5 0x40A8023:ud
+
+
+
+//Module Name : DN_UV_YUY2_Save_Curr_Frame_YUV
+//Author : Tatiya, Rupesh
+
+
+
+//Module Name : DN_UV_Load_Curr_Frame_Y
+//Author : Tatiya, Rupesh
+//Description : Saves Y or YUY2 of Current frame.
+
+
+
+
+ mov (8) acc0.0<1>:ud r0.0<8;8,1>:ud
+ shl (1) r62.10<1>:w r62.10<0;1,0>:w 1:w
+ mov (1) acc0.0<1>:d r62.10<0;1,0>:w
+ mov (1) acc0.1<1>:d r62.11<0;1,0>:w
+
+ mov (1) acc0.2<1>:d 0x7001F:ud
+
+ mov (8) r92.0<1>:ud acc0.0<8;8,1>:ud
+
+ mov (8) r101.0<1>:ud acc0.0<8;8,1>:ud
+ mov (8) r110.0<1>:ud acc0.0<8;8,1>:ud
+ mov (8) r119.0<1>:ud acc0.0<8;8,1>:ud
+
+ add (1) r101.1<1>:d acc0.1<0;1,0>:d 8:d
+
+ add (1) r110.0<1>:d acc0.0<0;1,0>:d 32:d
+
+ add (1) r119.0<1>:d acc0.0<0;1,0>:d 32:d
+ add (1) r119.1<1>:d acc0.1<0;1,0>:d 8:d
+
+ send (8) null<1>:d r92 0x5 0x120A8018:ud
+ send (8) null<1>:d r101 0x5 0x120A8018:ud
+ send (8) null<1>:d r110 0x5 0x120A8018:ud
+ send (8) null<1>:d r119 0x5 0x120A8018:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+ //All sub-routines here
+
+
+// Module Name : Noise_Detection
+// Author : Tatiya, Rupesh
+// Description : Performs noise detection on 32 pixels of U (8x4) and 32 pixels of V (8x4).
+
+DN_UV_NOISE_DETECTION_UV:
+
+// Find Field Block Median
+//
+// Purpose : Find the median value of the nine pixels in the same field
+// which are centered at current pixel.
+//
+// Works on 9 pixels centered at the current pixel
+// NOTE: pixels are within same field.
+// v4 - current pixel
+//
+// v2 v1 v0
+// * * * <--- Different field - not used
+// v5 v4 v3
+// * * * <--- Different field - not used
+// v8 v7 v6
+
+// Algorithm to find median modifies the data.
+// Copy the data needed to calculate median so the original source data stays intact.
+//
+
+//TODO - Change Interleaved implementation to separated one if - , does not work on predication. - rT
+
+//Delete Later - rT
+//mov (1) pCUR_UV:uw 52*32:uw
+
+// v0
+mov (16) ubMEDIAN_TEMP(0,0)<1> r[a0.0,0]<16;16,1>
+// v0
+mov (16) ubMEDIAN_TEMP(0,16)<1> r[a0.0,32]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(1,0)<1> r[a0.0,2]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(1,16)<1> r[a0.0,34]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(2,0)<1> r[a0.0,4]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(2,16)<1> r[a0.0,36]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(3,0)<1> r[a0.0,64]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(3,16)<1> r[a0.0,96]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(4,0)<1> r[a0.0,66]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(4,16)<1> r[a0.0,98]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(5,0)<1> r[a0.0,68]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(5,16)<1> r[a0.0,100]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(6,0)<1> r[a0.0,128]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(6,16)<1> r[a0.0,160]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(7,0)<1> r[a0.0,130]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(7,16)<1> r[a0.0,162]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(8,0)<1> r[a0.0,132]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(8,16)<1> r[a0.0,164]<16;16,1>
+
+//TODO - Optimize one instruction here.
+add (1) a0.0:uw a0.0<0;1,0>:uw 64:uw
+// v0
+mov (16) ubMEDIAN_TEMP(9,0)<1> r[a0.0,0]<16;16,1>
+// v0
+mov (16) ubMEDIAN_TEMP(9,16)<1> r[a0.0,32]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(10,0)<1> r[a0.0,2]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(10,16)<1> r[a0.0,34]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(11,0)<1> r[a0.0,4]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(11,16)<1> r[a0.0,36]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(12,0)<1> r[a0.0,64]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(12,16)<1> r[a0.0,96]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(13,0)<1> r[a0.0,66]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(13,16)<1> r[a0.0,98]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(14,0)<1> r[a0.0,68]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(14,16)<1> r[a0.0,100]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(15,0)<1> r[a0.0,128]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(15,16)<1> r[a0.0,160]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(16,0)<1> r[a0.0,130]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(16,16)<1> r[a0.0,162]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(17,0)<1> r[a0.0,132]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(17,16)<1> r[a0.0,164]<16;16,1>
+
+//TODO - Optimize one instruction here.
+add (1) a0.0:uw a0.0<0;1,0>:uw 64:uw
+
+// MedianSwap
+//
+// MedianSwap(inOutLeft, inOutRight)
+// {
+// if (inOutLeft > inOutRight)
+// {
+// temp = inOutLeft
+// inOutLeft = inOutRight
+// inOutRight = temp
+// }
+// }
+
+// MedianSwap(v1, v2) - U
+// MedianSwap(v4, v5) - U
+// MedianSwap(v1, v2) - V
+// MedianSwap(v4, v5) - V
+
+cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(1,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(5,0)<32;16,2>
+cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(1,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(1,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(1,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+(f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(5,0)<32;16,2>
+(f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+(f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubTEMP1(0,0)<16;16,1>
+(f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2> ubTEMP1(0,16)<16;16,1>
+(f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2> ubTEMP1(1,0)<16;16,1>
+(f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(7,0)<32;16,2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(0,0)<32;16,2> ubMEDIAN_TEMP(1,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(7,1)<32;16,2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(0,1)<32;16,2> ubMEDIAN_TEMP(1,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(7,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(0,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(7,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(0,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2> ubMEDIAN_TEMP(1,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2> ubMEDIAN_TEMP(1,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(1,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(1,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(3,0)<32;16,2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(6,0)<32;16,2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(3,1)<32;16,2> ubMEDIAN_TEMP(4,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(6,1)<32;16,2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(3,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(6,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(3,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(6,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(3,0)<2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(6,0)<2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(3,1)<2> ubMEDIAN_TEMP(4,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(6,1)<2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(4,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(1,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(1,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(1,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(1,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(7,0)<32;16,2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(0,0)<32;16,2> ubMEDIAN_TEMP(3,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(7,1)<32;16,2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(0,1)<32;16,2> ubMEDIAN_TEMP(3,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(7,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(0,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(7,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(0,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2> ubMEDIAN_TEMP(3,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2> ubMEDIAN_TEMP(3,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(3,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(3,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0 (16) null:w ubMEDIAN_TEMP(%1+0,0)<32;16,2> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0 (16) null:w ubMEDIAN_TEMP(%1+0,1)<32;16,2> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(5,0)<32;16,2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(5,1)<32;16,2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(5,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(5,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(5,0)<2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(5,1)<2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(3,0)<32;16,2> ubMEDIAN_TEMP(6,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(1,0)<32;16,2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(3,1)<32;16,2> ubMEDIAN_TEMP(6,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(1,1)<32;16,2> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(6,0)<2> ubMEDIAN_TEMP(3,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(1,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(6,1)<2> ubMEDIAN_TEMP(3,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(1,1)<32;16,2>
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(2,0)<32;16,2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(2,1)<32;16,2> ubMEDIAN_TEMP(5,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2> ubMEDIAN_TEMP(5,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(2,1)<2> ubTEMP1(0,16)<16;16,1>
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(6,0)<32;16,2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(6,1)<32;16,2> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(6,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(6,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(10,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(14,0)<32;16,2>
+cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(10,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(10,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(10,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+(f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(14,0)<32;16,2>
+(f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+(f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubTEMP1(0,0)<16;16,1>
+(f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2> ubTEMP1(0,16)<16;16,1>
+(f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2> ubTEMP1(1,0)<16;16,1>
+(f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(16,0)<32;16,2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(9,0)<32;16,2> ubMEDIAN_TEMP(10,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(16,1)<32;16,2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(9,1)<32;16,2> ubMEDIAN_TEMP(10,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(16,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(9,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(16,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(9,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2> ubMEDIAN_TEMP(10,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2> ubMEDIAN_TEMP(10,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(10,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(10,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(12,0)<32;16,2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(15,0)<32;16,2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(12,1)<32;16,2> ubMEDIAN_TEMP(13,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(15,1)<32;16,2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(12,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(15,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(12,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(15,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(12,0)<2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(15,0)<2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(12,1)<2> ubMEDIAN_TEMP(13,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(15,1)<2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(13,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(10,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(10,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(10,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(10,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(16,0)<32;16,2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(9,0)<32;16,2> ubMEDIAN_TEMP(12,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(16,1)<32;16,2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(9,1)<32;16,2> ubMEDIAN_TEMP(12,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(16,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(9,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(16,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(9,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2> ubMEDIAN_TEMP(12,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2> ubMEDIAN_TEMP(12,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(12,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(12,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0 (16) null:w ubMEDIAN_TEMP(%1+0,0)<32;16,2> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0 (16) null:w ubMEDIAN_TEMP(%1+0,1)<32;16,2> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(14,0)<32;16,2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(14,1)<32;16,2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(14,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(14,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(14,0)<2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(14,1)<2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(12,0)<32;16,2> ubMEDIAN_TEMP(15,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(10,0)<32;16,2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(12,1)<32;16,2> ubMEDIAN_TEMP(15,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(10,1)<32;16,2> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(15,0)<2> ubMEDIAN_TEMP(12,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(10,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(15,1)<2> ubMEDIAN_TEMP(12,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(10,1)<32;16,2>
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(11,0)<32;16,2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(11,1)<32;16,2> ubMEDIAN_TEMP(14,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2> ubMEDIAN_TEMP(14,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(11,1)<2> ubTEMP1(0,16)<16;16,1>
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(15,0)<32;16,2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(15,1)<32;16,2> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(15,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(15,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+// Sobel Value calculation for the current pixel v4
+// v2 v1 v0
+// * * * <--- Different field - not used
+// v5 v4 v3
+// * * * <--- Different field - not used
+// v8 v7 v6
+//
+// Gx = -v0 - 2*v3 - v6 + v2 + 2*v5 + v8
+// Gy = v0 + 2*v1 + v2 - v6 - 2*v7 - v8
+//
+// Sobel = (|Gx| + |Gy|) >> 3
+
+//TODO - Change Later - rT
+add (1) a0.0:uw a0.0<0;1,0>:uw -128:uw
+
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,64]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,132]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,0]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,128]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,4]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(0)<1> r[a0.0,68]<16;16,1>:ub 2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,96]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,164]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,32]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,160]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,36]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(1)<1> r[a0.0,100]<16;16,1>:ub 2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,128]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,196]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,64]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,192]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,68]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(2)<1> r[a0.0,132]<16;16,1>:ub 2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,160]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,228]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,96]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,224]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,100]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(3)<1> r[a0.0,164]<16;16,1>:ub 2:w
+
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,2]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,0]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,132]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,4]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,128]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,130]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(0)<16;16,1>
+
+shr (16) uwSOBEL(0)<1> acc0.0<16;16,1>:uw 3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,34]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,32]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,164]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,36]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,160]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,162]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(1)<16;16,1>
+
+shr (16) uwSOBEL(1)<1> acc0.0<16;16,1>:uw 3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,66]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,64]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,196]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,68]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,192]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,194]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(2)<16;16,1>
+
+shr (16) uwSOBEL(2)<1> acc0.0<16;16,1>:uw 3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,98]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,96]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,228]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,100]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,224]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,226]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(3)<16;16,1>
+
+shr (16) uwSOBEL(3)<1> acc0.0<16;16,1>:uw 3:uw
+
+//Mov Median in CURBE_TEMP to free up temp space.
+mov (16) ubMEDIAN(0,0)<1> ubMEDIAN_TEMP(4,0)<16;16,1>
+mov (16) ubMEDIAN(0,16)<1> ubMEDIAN_TEMP(4,16)<16;16,1>
+mov (16) ubMEDIAN(0,32)<1> ubMEDIAN_TEMP(13,0)<16;16,1>
+mov (16) ubMEDIAN(0,48)<1> ubMEDIAN_TEMP(13,16)<16;16,1>
+
+// Find:
+// absDiff = abs(ubCurY - ubMedian)
+// Find the difference between pixel and median value.
+
+//Median is interleaved. So difference is also interleaved.
+
+//------------------------------------------------------------------------------------------
+//Process 16 U and 16 V pixels here and rest later.
+// first row - v0,v1,v2
+add (16) wDIFF(0)<1> r[a0.0,0]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(1)<1> r[a0.0,2]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(2)<1> r[a0.0,4]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(3)<1> r[a0.0,64]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(4)<1> r[a0.0,66]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(5)<1> r[a0.0,68]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(6)<1> r[a0.0,128]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(7)<1> r[a0.0,130]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(8)<1> r[a0.0,132]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+// first row - v0,v1,v2
+add (16) wDIFF(9)<1> r[a0.0,32]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(10)<1> r[a0.0,34]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(11)<1> r[a0.0,36]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(12)<1> r[a0.0,96]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(13)<1> r[a0.0,98]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(14)<1> r[a0.0,100]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(15)<1> r[a0.0,160]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(16)<1> r[a0.0,162]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(17)<1> r[a0.0,164]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+
+//TODO - Change Later - rT
+add (1) a0.0:uw a0.0<0;1,0>:uw 64:uw
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//First 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(2)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(3)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(4)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(5)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(6)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(7)<16;16,1>
+ add (16) uwSOAD(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//------------
+ //DIFF(0-7) is not needed here. Populate it.
+ // first row - v0,v1,v2
+ add (16) wDIFF(0)<1> r[a0.0,0]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(1)<1> r[a0.0,2]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(2)<1> r[a0.0,4]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+
+ // second row - v3,v4,v5
+ add (16) wDIFF(3)<1> r[a0.0,64]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(4)<1> r[a0.0,66]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(5)<1> r[a0.0,68]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+
+ // third row - v6,v7
+ add (16) wDIFF(6)<1> r[a0.0,128]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(7)<1> r[a0.0,130]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(0)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+
+//------------
+ //Load v8 - DIFF(8)
+ add (16) wDIFF(8)<1> r[a0.0,132]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+//------------
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(11)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(12)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(13)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(14)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(15)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(16)<16;16,1>
+ add (16) uwSOAD(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//------------
+ //DIFF(0-7) is not needed here. Populate it.
+ // first row - v0,v1,v2
+ add (16) wDIFF(9)<1> r[a0.0,32]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(10)<1> r[a0.0,34]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(11)<1> r[a0.0,36]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+
+ // second row - v3,v4,v5
+ add (16) wDIFF(12)<1> r[a0.0,96]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(13)<1> r[a0.0,98]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(14)<1> r[a0.0,100]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+
+ // third row - v6,v7
+ add (16) wDIFF(15)<1> r[a0.0,160]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(16)<1> r[a0.0,162]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(1)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+
+//------------
+ //Load v8 - DIFF(8)
+ add (16) wDIFF(17)<1> r[a0.0,164]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max-block_min) < m_LocalDiffThreshold))
+// if (sigma_mb_min > sigma)
+// sigma_mb_min = sigma;
+
+//NOTE: block_min is always zero as median is one of the value in 3x3 block. So no need o calculate it.
+// So just do -
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max) < m_LocalDiffThreshold) && ( sigma < sigma_mb_min))
+// sigma_mb_min = sigma;
+
+//We are processing 32 bytes of U and 32 bytes of V - each of size 8x4.
+//Compare first 8 bytes with max possible (255).
+//Start above condition from second 8 bytes.
+
+//TODO - Change Later - rT
+// mov (1) pCUR_MIN_SOAD_8x4:uw 1752:uw //r54.24:ub
+
+//First row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(0)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(0)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(0)<16;16,1> 255:uw
+(f0.0) sel (16) uwSOBEL(0)<1> uwSOAD(0)<16;16,1> 255:uw
+
+//Second row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(1)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(1)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(1)<16;16,1> uwSOBEL(0)<16;16,1>
+(f0.0) mov (16) uwSOBEL(0)<1> uwSOAD(1)<16;16,1>
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//Second 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(2)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(3)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(4)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(5)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(6)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(7)<16;16,1>
+ add (16) uwSOAD(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(0)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(11)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(12)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(13)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(14)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(15)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(16)<16;16,1>
+ add (16) uwSOAD(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(1)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+
+//Third row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(2)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(0)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(0)<16;16,1> uwSOBEL(0)<16;16,1>
+(f0.0) mov (16) uwSOBEL(0)<1> uwSOAD(0)<16;16,1>
+
+//Fourth row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(3)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(1)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(1)<16;16,1> uwSOBEL(0)<16;16,1>
+(f0.0) mov (16) uwSOBEL(0)<1> uwSOAD(1)<16;16,1>
+
+ cmp.l.f0.0 (8) null:uw uwSOBEL(0,0)<8;8,1> uwSOBEL(0,8)<8;8,1>
+(f0.0) sel (8) uwSOBEL(0)<1> uwSOBEL(0,0)<8;8,1> uwSOBEL(0,8)<8;8,1>
+
+ cmp.l.f0.0 (4) null:uw uwSOBEL(0,0)<4;4,1> uwSOBEL(0,4)<4;4,1>
+(f0.0) sel (4) uwSOBEL(0)<1> uwSOBEL(0,0)<4;4,1> uwSOBEL(0,4)<4;4,1>
+
+ cmp.l.f0.0 (2) null:uw uwSOBEL(0,0)<2;2,1> uwSOBEL(0,2)<2;2,1>
+(f0.0) sel (2) r[a0.1,0]<1>:uw uwSOBEL(0,0)<2;2,1> uwSOBEL(0,2)<2;2,1>
+
+
+
+
+
+
+// End of common.inc
+
+mov (1) ip:ud r7.7<0;1,0>:d
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DN_422CP.g4a b/src/shaders/post_processing/gen7/PA_DN_422CP.g4a
new file mode 100644
index 0000000..37f0ff0
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DN_422CP.g4a
@@ -0,0 +1,491 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 114 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PA_DN_422CP
+.code
+
+
+
+// FileName: DN_PA_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block) for Packed format
+
+
+
+// FileName: DN.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x49E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(4,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+
+ mov (2) mudMSGHDR_HIST(1)<1> udDNDI_RESP(4,0)<2;2,1> // Move denoise history to MRF (4x2)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x10003:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x50003:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | X | X | X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ //| X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ mov (1) mubMSGHDR_ENC_STATS(1,0)<1> ubDNDI_RESP(4,8)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,3)<1> uwDNDI_RESP(4,11)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,4)<1> uwDNDI_RESP(4,12)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,9)<1> uwDNDI_RESP(4,8)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,10)<1> uwDNDI_RESP(4,9)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Save_PA.asm
+// Author: Vivek Kumar
+// Description: Save one 16x8 blocks of DN output in Packed format
+
+
+add (4) a0.4<1>:uw r2.28<4;4,1>:ub 1024:w // Initial Y,U,V offset in YUV422 block; it starts at m14
+
+mov (8) mudMSGHDR_DN_OUT(0)<1> r0<8;8,1>:ud // message header
+shl (1) mdMSGHDR_DN_OUT(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin * 2 (422 output)
+mov (1) mdMSGHDR_DN_OUT(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_DN_OUT(0,2)<1> 0x7001F:ud { NoDDChk } // block width and height (32x8)
+
+ mov (16) r[a0.4,0]<2>:ub ubDNDI_RESP(0,0)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,32]<2>:ub ubDNDI_RESP(0,16)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,64]<2>:ub ubDNDI_RESP(0,32)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,96]<2>:ub ubDNDI_RESP(0,48)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,128]<2>:ub ubDNDI_RESP(0,64)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,160]<2>:ub ubDNDI_RESP(0,80)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,192]<2>:ub ubDNDI_RESP(0,96)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,224]<2>:ub ubDNDI_RESP(0,112)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) r[a0.5,0]<4>:ub ubDNDI_RESP(5,1)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,0]<4>:ub ubDNDI_RESP(5,0)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,32]<4>:ub ubDNDI_RESP(5,17)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,32]<4>:ub ubDNDI_RESP(5,16)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,64]<4>:ub ubDNDI_RESP(5,33)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,64]<4>:ub ubDNDI_RESP(5,32)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,96]<4>:ub ubDNDI_RESP(5,49)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,96]<4>:ub ubDNDI_RESP(5,48)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,128]<4>:ub ubDNDI_RESP(5,65)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,128]<4>:ub ubDNDI_RESP(5,64)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,160]<4>:ub ubDNDI_RESP(5,81)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,160]<4>:ub ubDNDI_RESP(5,80)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,192]<4>:ub ubDNDI_RESP(5,97)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,192]<4>:ub ubDNDI_RESP(5,96)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,224]<4>:ub ubDNDI_RESP(5,113)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,224]<4>:ub ubDNDI_RESP(5,112)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+//send out data through data port
+send (8) null<1>:d r31.0 0x5 0x120A8018:ud
+
+
+
+// FileName: DN_Save_422CP_16x8.asm
+// Author: Vivek Kumar
+// Description: Save one 16x8 blocks of DN output to the color pipe in 4-2-2 format
+
+
+.declare mubMSGHDR_DN_OUT_2 Base=r36.0 ElementSize=1 Type=ub
+
+
+mov (8) mudMSGHDR_DN_OUT(0)<1> r0<8;8,1>:ud // message header
+shl (1) mdMSGHDR_DN_OUT(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin * 2 (422 output)
+mov (1) mdMSGHDR_DN_OUT(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_DN_OUT(0,2)<1> 0x7000F:ud { NoDDClr, NoDDChk } // block width and height (16x8)
+
+//M0.3 - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1) mudMSGHDR_DN_OUT(0,3)<1> r2.4<0;1,0>:ud r7.26<0;1,0>:b { NoDDChk }
+
+// First 8 x 8 Block
+ mov (8) mubMSGHDR_DN_OUT(1)<2> ubDNDI_RESP(0,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(1,16)<2> ubDNDI_RESP(0,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(2)<2> ubDNDI_RESP(0,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(2,16)<2> ubDNDI_RESP(0,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(3)<2> ubDNDI_RESP(0,64)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(3,16)<2> ubDNDI_RESP(0,80)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(4)<2> ubDNDI_RESP(0,96)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(4,16)<2> ubDNDI_RESP(0,112)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(1,1)<4> ubDNDI_RESP(5,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(1,17)<4> ubDNDI_RESP(5,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(1,3)<4> ubDNDI_RESP(5,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(1,19)<4> ubDNDI_RESP(5,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(2,1)<4> ubDNDI_RESP(5,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(2,17)<4> ubDNDI_RESP(5,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(2,3)<4> ubDNDI_RESP(5,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(2,19)<4> ubDNDI_RESP(5,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(3,1)<4> ubDNDI_RESP(5,65)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(3,17)<4> ubDNDI_RESP(5,81)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(3,3)<4> ubDNDI_RESP(5,64)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(3,19)<4> ubDNDI_RESP(5,80)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(4,1)<4> ubDNDI_RESP(5,97)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(4,17)<4> ubDNDI_RESP(5,113)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(4,3)<4> ubDNDI_RESP(5,96)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(4,19)<4> ubDNDI_RESP(5,112)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+// Second 8 x 8 Block
+mov (8) r36.0<1>:ud r31.0<8;8,1>:ud
+add (1) r36.0<1>:ud r36.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DN_OUT_2(1)<2> ubDNDI_RESP(0,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(1,16)<2> ubDNDI_RESP(0,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(2)<2> ubDNDI_RESP(0,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(2,16)<2> ubDNDI_RESP(0,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(3)<2> ubDNDI_RESP(0,72)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(3,16)<2> ubDNDI_RESP(0,88)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(4)<2> ubDNDI_RESP(0,104)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(4,16)<2> ubDNDI_RESP(0,120)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(1,1)<4> ubDNDI_RESP(5,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(1,17)<4> ubDNDI_RESP(5,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(1,3)<4> ubDNDI_RESP(5,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(1,19)<4> ubDNDI_RESP(5,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(2,1)<4> ubDNDI_RESP(5,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(2,17)<4> ubDNDI_RESP(5,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(2,3)<4> ubDNDI_RESP(5,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(2,19)<4> ubDNDI_RESP(5,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(3,1)<4> ubDNDI_RESP(5,73)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(3,17)<4> ubDNDI_RESP(5,89)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(3,3)<4> ubDNDI_RESP(5,72)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(3,19)<4> ubDNDI_RESP(5,88)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(4,1)<4> ubDNDI_RESP(5,105)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(4,17)<4> ubDNDI_RESP(5,121)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(4,3)<4> ubDNDI_RESP(5,104)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(4,19)<4> ubDNDI_RESP(5,120)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+//send out data through data port
+send (8) null<1>:d r31.0 0x5 0xA0A801B:ud
+send (8) null<1>:d r36.0 0x5 0xA0A801B:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PA_DN_PA.g4a b/src/shaders/post_processing/gen7/PA_DN_PA.g4a
new file mode 100644
index 0000000..5469949
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PA_DN_PA.g4a
@@ -0,0 +1,403 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 57 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PA_DN_PA
+.code
+
+
+
+// FileName: DN_PA_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block) for Packed format
+
+
+
+// FileName: DN.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x49E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(4,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+
+ mov (2) mudMSGHDR_HIST(1)<1> udDNDI_RESP(4,0)<2;2,1> // Move denoise history to MRF (4x2)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x10003:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x50003:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | X | X | X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ //| X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ mov (1) mubMSGHDR_ENC_STATS(1,0)<1> ubDNDI_RESP(4,8)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,3)<1> uwDNDI_RESP(4,11)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,4)<1> uwDNDI_RESP(4,12)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,9)<1> uwDNDI_RESP(4,8)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,10)<1> uwDNDI_RESP(4,9)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Save_PA.asm
+// Author: Vivek Kumar
+// Description: Save one 16x8 blocks of DN output in Packed format
+
+
+add (4) a0.4<1>:uw r2.28<4;4,1>:ub 1024:w // Initial Y,U,V offset in YUV422 block; it starts at m14
+
+mov (8) mudMSGHDR_DN_OUT(0)<1> r0<8;8,1>:ud // message header
+shl (1) mdMSGHDR_DN_OUT(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin * 2 (422 output)
+mov (1) mdMSGHDR_DN_OUT(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_DN_OUT(0,2)<1> 0x7001F:ud { NoDDChk } // block width and height (32x8)
+
+ mov (16) r[a0.4,0]<2>:ub ubDNDI_RESP(0,0)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,32]<2>:ub ubDNDI_RESP(0,16)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,64]<2>:ub ubDNDI_RESP(0,32)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,96]<2>:ub ubDNDI_RESP(0,48)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,128]<2>:ub ubDNDI_RESP(0,64)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,160]<2>:ub ubDNDI_RESP(0,80)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,192]<2>:ub ubDNDI_RESP(0,96)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (16) r[a0.4,224]<2>:ub ubDNDI_RESP(0,112)<16;16,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) r[a0.5,0]<4>:ub ubDNDI_RESP(5,1)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,0]<4>:ub ubDNDI_RESP(5,0)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,32]<4>:ub ubDNDI_RESP(5,17)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,32]<4>:ub ubDNDI_RESP(5,16)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,64]<4>:ub ubDNDI_RESP(5,33)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,64]<4>:ub ubDNDI_RESP(5,32)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,96]<4>:ub ubDNDI_RESP(5,49)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,96]<4>:ub ubDNDI_RESP(5,48)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,128]<4>:ub ubDNDI_RESP(5,65)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,128]<4>:ub ubDNDI_RESP(5,64)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,160]<4>:ub ubDNDI_RESP(5,81)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,160]<4>:ub ubDNDI_RESP(5,80)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,192]<4>:ub ubDNDI_RESP(5,97)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,192]<4>:ub ubDNDI_RESP(5,96)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (8) r[a0.5,224]<4>:ub ubDNDI_RESP(5,113)<16;8,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (8) r[a0.6,224]<4>:ub ubDNDI_RESP(5,112)<16;8,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+//send out data through data port
+send (8) null<1>:d r31.0 0x5 0x120A8018:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL2_AVS_Buf_0.g4a b/src/shaders/post_processing/gen7/PL2_AVS_Buf_0.g4a
new file mode 100644
index 0000000..b92a5d0
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL2_AVS_Buf_0.g4a
@@ -0,0 +1,542 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 44 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_0.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_0_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x0000D000:ud // Enable Red channel
+
+
+ mov (1) r25.7<1>:ud r7.7:ud { NoDDClr }
+ mov (1) r25.1<1>:ud r7.12:uw { NoDDChk }
+
+
+ // set the vertical block number
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+ mov (1) r14.5:f r17.2<0;1,0>:f // save pixel 0 U for chroma
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_PL2_AVS_WA_DONE_L0_0_
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+GEN7_PL2_AVS_WA_DONE_L0_0_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+ send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ mov (1) r17.2:f r14.5:f // restore pixel 0 U for chroma, No AVS WA for chroma
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x48EB801:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000A000:ud // Enable Red+Blue channel
+
+ send (1) uwBUFFER_0(4)<1> r16 0x2 a0.0:ud
+ // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL2_AVS_Buf_1.g4a b/src/shaders/post_processing/gen7/PL2_AVS_Buf_1.g4a
new file mode 100644
index 0000000..9609f60
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL2_AVS_Buf_1.g4a
@@ -0,0 +1,535 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 42 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_1.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 1
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_1_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x0000D000:ud // Enable Red channel
+
+
+ // set the vertical block number
+
+ add (1) r25.1<1>:ud r7.12:uw 1:ud
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+ mov (1) r14.5:f r17.2<0;1,0>:f // save pixel 0 U for chroma
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_PL2_AVS_WA_DONE_L0_1_
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+GEN7_PL2_AVS_WA_DONE_L0_1_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+ send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ mov (1) r17.2:f r14.5:f // restore pixel 0 U for chroma, No AVS WA for chroma
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x48EB801:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000A000:ud // Enable Red+Blue channel
+
+ send (1) uwBUFFER_1(4)<1> r16 0x2 a0.0:ud
+ // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_1_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL2_AVS_Buf_2.g4a b/src/shaders/post_processing/gen7/PL2_AVS_Buf_2.g4a
new file mode 100644
index 0000000..d07d1f2
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL2_AVS_Buf_2.g4a
@@ -0,0 +1,536 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 42 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_2.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 2
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_2_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x0000D000:ud // Enable Red channel
+
+
+ // set the vertical block number
+
+
+ add (1) r25.1<1>:ud r7.12:uw 2:ud
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+ mov (1) r14.5:f r17.2<0;1,0>:f // save pixel 0 U for chroma
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_PL2_AVS_WA_DONE_L0_2_
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+GEN7_PL2_AVS_WA_DONE_L0_2_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+ send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ mov (1) r17.2:f r14.5:f // restore pixel 0 U for chroma, No AVS WA for chroma
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x48EB801:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000A000:ud // Enable Red+Blue channel
+
+ send (1) uwBUFFER_2(4)<1> r16 0x2 a0.0:ud
+ // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_2_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL2_AVS_Buf_3.g4a b/src/shaders/post_processing/gen7/PL2_AVS_Buf_3.g4a
new file mode 100644
index 0000000..3500df1
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL2_AVS_Buf_3.g4a
@@ -0,0 +1,536 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 42 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL2_AVS_Buf_3.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL2 data into Buffer 0
+
+
+
+// FileName : PL2_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL2 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_3_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x0000D000:ud // Enable Red channel
+
+
+ // set the vertical block number
+
+
+ add (1) r25.1<1>:ud r7.12:uw 3:ud
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+ mov (1) r14.5:f r17.2<0;1,0>:f // save pixel 0 U for chroma
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_PL2_AVS_WA_DONE_L0_3_
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+GEN7_PL2_AVS_WA_DONE_L0_3_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+ send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ mov (1) r17.2:f r14.5:f // restore pixel 0 U for chroma, No AVS WA for chroma
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x48EB801:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000A000:ud // Enable Red+Blue channel
+
+ send (1) uwBUFFER_3(4)<1> r16 0x2 a0.0:ud
+ // Returns UV data in 8 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_3_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL3_AVS_Buf_0.g4a b/src/shaders/post_processing/gen7/PL3_AVS_Buf_0.g4a
new file mode 100644
index 0000000..8d487aa
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_AVS_Buf_0.g4a
@@ -0,0 +1,549 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 47 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL3_AVS_Buf_0.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL3 data into Buffer 0
+
+
+
+// FileName : PL3_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL3 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //NOTE: We need offsets for second halfof LAYER 0 - even if we do not load it.
+ //Update the channel offset in the buffers for the lower 8x4 data for BUFFER_0.
+ mov (1) r22.4<1>:ud 0x400040:ud
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_0_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x0000D000:ud // Enable Red channel
+
+
+ mov (1) r25.7<1>:ud r7.7:ud { NoDDClr }
+ mov (1) r25.1<1>:ud r7.12:uw { NoDDChk }
+
+
+ // set the vertical block number
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+ mov (1) r14.5:f r17.2<0;1,0>:f // save pixel 0 U for chroma
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_PL3_AVS_WA_DONE_L0_0_
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+GEN7_PL3_AVS_WA_DONE_L0_0_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+
+ send (1) uwBUFFER_0(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ mov (1) r17.2:f r14.5:f // restore pixel 0 U for chroma, No AVS WA for chroma
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB801:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_0(4)<1> r16 0x2 a0.0:ud
+ // Returns U data in 4 GRFs in scrambled order
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EBC02:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_0(8)<1> r16 0x2 a0.0:ud
+ // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_0_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL3_AVS_Buf_1.g4a b/src/shaders/post_processing/gen7/PL3_AVS_Buf_1.g4a
new file mode 100644
index 0000000..eb26775
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_AVS_Buf_1.g4a
@@ -0,0 +1,542 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 45 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL3_AVS_Buf_1.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL3 data into Buffer 1
+
+
+
+// FileName : PL3_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL3 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_1_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x0000D000:ud // Enable Red channel
+
+
+ // set the vertical block number
+
+ add (1) r25.1<1>:ud r7.12:uw 1:ud
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+ mov (1) r14.5:f r17.2<0;1,0>:f // save pixel 0 U for chroma
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_PL3_AVS_WA_DONE_L0_1_
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+GEN7_PL3_AVS_WA_DONE_L0_1_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+
+ send (1) uwBUFFER_1(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ mov (1) r17.2:f r14.5:f // restore pixel 0 U for chroma, No AVS WA for chroma
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB801:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_1(4)<1> r16 0x2 a0.0:ud
+ // Returns U data in 4 GRFs in scrambled order
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EBC02:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_1(8)<1> r16 0x2 a0.0:ud
+ // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_1_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL3_AVS_Buf_2.g4a b/src/shaders/post_processing/gen7/PL3_AVS_Buf_2.g4a
new file mode 100644
index 0000000..aa96383
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_AVS_Buf_2.g4a
@@ -0,0 +1,543 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 45 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL3_AVS_Buf_2.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL3 data into Buffer 2
+
+
+
+// FileName : PL3_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL3 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_2_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x0000D000:ud // Enable Red channel
+
+
+ // set the vertical block number
+
+
+ add (1) r25.1<1>:ud r7.12:uw 2:ud
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+ mov (1) r14.5:f r17.2<0;1,0>:f // save pixel 0 U for chroma
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_PL3_AVS_WA_DONE_L0_2_
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+GEN7_PL3_AVS_WA_DONE_L0_2_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+
+ send (1) uwBUFFER_2(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ mov (1) r17.2:f r14.5:f // restore pixel 0 U for chroma, No AVS WA for chroma
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB801:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_2(4)<1> r16 0x2 a0.0:ud
+ // Returns U data in 4 GRFs in scrambled order
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EBC02:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_2(8)<1> r16 0x2 a0.0:ud
+ // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_2_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL3_AVS_Buf_3.g4a b/src/shaders/post_processing/gen7/PL3_AVS_Buf_3.g4a
new file mode 100644
index 0000000..dfacd42
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_AVS_Buf_3.g4a
@@ -0,0 +1,543 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 45 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: PL3_AVS_Buf_3.asm
+// Author: Tatiya, Rupesh
+// Description: Loads 8x8 AVS/IEF PL3 data into Buffer 3
+
+
+
+// FileName : PL3_AVS_Buf.asm
+// Author : Tatiya, Rupesh
+// Description : Loads 8x8 AVS/IEF PL3 data into Buffer N
+
+
+
+// Module name: Scaling.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ // Message Header
+ // m0.7 31:0 Debug
+ // m0.6 31:0 Debug
+ // m0.5 31:0 Ignored
+ // m0.4 31:0 Ignored
+ // m0.3 31:0 Ignored
+ // m0.2 31:16 Ignored
+ // 15 Alpha Write Channel Mask enable=0, disable=1
+ // 14 Blue Write Channel Mask (U)
+ // 13 Green Write Channel Mask (Y)
+ // 12 Red Write Channel Mask (V)
+ // 11:0 Ignored
+ // m0.1 Ignored
+ // m0.0 Ignored
+
+
+ // AVS payload
+ // m1.7 Group ID Number
+ // m1.6 U 2nd Derivative ---> NLAS dx
+ // m1.5 Delta V ---> Step Y
+ // m1.4 Delta U ---> Step X
+ // m1.3 Pixel 0 V Address ---> ORIY (Y0)
+ // m1.2 Pixel 0 U Address ---> ORIX (X0)
+ // m1.1 Vertical Block Number
+ // m1.0 Reserved
+
+ // Sampler Message Descriptor
+ // 31:29 Reserved 000
+ // 28:25 Message length 0010
+ // 24:20 Response length xxxxx ---> 4GRFs for each enabled channel (AVS), 2GRFs for each enabled channel (sample unorm)
+ // 19 Header Present 1
+ // 18:17 SIMD Mode 11 ---> SIMD32/64
+ // 16:12 Message Type xxxxx ---> 01011 sample_8x8, 01100 (sample_unorm), 01010 (sample_unorm+killpix)
+ // 11:8 Sampler Index xxxx
+ // 7:0 Binding Table Index xxxxxxxx
+
+
+ // Msg Header M0.2
+ // 15:15 Alpha Write Channel Mask, 0: written back, 1: not written back
+ // 14:14 Blue Write Channel Mask
+ // 13:13 Green Write Channel Mask
+ // 12:12 Red Write Channel Mask
+
+
+//By design, Buffer 0,1,2,3 always have Layer 0 and Buffer 4,5 always have L1-L7
+
+
+//used to generate LABELS at compile time.
+
+
+ // 18:17 SIMD Mode 10 ---> SIMD16
+ // 16:12 Message Type xxxxx ---> 00000 (SIMD16)
+
+
+//r10-17 - 8 GRFs to load SIMD16 data (upto 4 channels)
+//r18-19 - 2 GRFs to store sampler ramp.
+
+ .declare mfSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare muwSCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+ .declare mudCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare mubCALING_0X_34X_PAYLOAD Base=r14.0 ElementSize=1 SrcRegion=<32;32,1> DstRegion=<1> Type=ub
+
+
+ .declare fSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+ .declare udSCALING_0X_34X_TEMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+ .declare ub4SCALING_0X_34X_TEMP Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<1> Type=ub
+ .declare uwSCALING_0X_34X_TEMP Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+ // Sampler ramp is used for Scaling 0X_0.34X
+ .declare fSAMPLER_RAMP Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> Type=f // 1 GRFs, 8 elements
+
+
+ //#define rMSGDSC_UV r23.0
+
+
+//End of _SCALING_
+
+
+ //Check if layer is to be skipped
+
+
+ // f0.1 pre-computed in Set_Layer_0
+ (-f0.1) jmpi (1) SKIP_AVS_LOAD_L0_3_
+
+
+ //AVS_PAYLOAD already has all the data loaded at this point
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB400:ud //msg desc
+
+ mov (1) r16.2:ud 0x0000D000:ud // Enable Red channel
+
+
+ // set the vertical block number
+
+
+ add (1) r25.1<1>:ud r7.12:uw 3:ud
+
+
+ mov (8) r17.0:ud r25.0<8;8,1>:ud // Copy msg payload mirrors to MRFs
+
+ // Gen7 AVS WA Only for YUV packed surfaces, NV12 and Y-channel only for Planar surfaces
+ // if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ // }
+ // else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ // {
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ // }
+ // else{
+ // modified_u_coord = u_coord;
+ // }
+ // Where u_left = u â 2*du + 3*ddu for IEF On
+ // And u_left = u for IEF Off case
+ //
+
+ // check whether Gen7 AVS WA is enabled,
+ mov (1) r14.8:uw f0.0:uw // save f0.0
+ mov (1) r14.5:f r17.2<0;1,0>:f // save pixel 0 U for chroma
+
+ and.nz.f0.0 (1) null<1>:uw r2.3:uw 0x2:uw
+ (-f0.0)jmpi (1) GEN7_PL3_AVS_WA_DONE_L0_3_
+
+ // Gen7 AVS WA, check if IEF is ON for choosing Gen7 AVS WA formula
+
+ and.nz.f0.0 (8) null<1>:uw r2.3<0;1,0>:uw 0x4:uw
+ (f0.0)mov (8) acc0.0:f r17.2<0;1,0>:f
+ (f0.0)mac (8) acc0.0:f r17.4<0;1,0>:f -2.0:f
+ (f0.0)mac (8) acc0.0:f r17.6<0;1,0>:f 3.0:f
+ (f0.0)mov (1) r14.2:f acc0:f // IEF ON, rTEMP3.2 = u_left
+ (-f0.0)mov (1) r14.2:f r17.2<0;1,0>:f // IEF OFF, rTEMP3.2 = u_left
+
+ and (1) r14.1:ud r2.3:uw 0xFFF8:uw
+ asr (1) r14.1:ud r14.1:ud 3:d
+ mov (1) r14.1:f r14.1:ud
+
+ // Gen7 AVS WA, if (int)(u_left*width + 5.0/256) > (int)(u_left*width)
+ mul (1) r14.0:f r14.2:f r14.1:f // rTEMP3.0 = u_left*width
+ add (1) r14.2:f r14.0:f 0.01953125:f // rTEMP3.2 = u_left*width + 5.0/256
+ add (1) r14.3:f r14.0:f 0.99609375:f // rTEMP3.3 = u_left*width + 255.0/256
+
+ //Check if the values are < 0 and account for (int) cast of negative numbers
+
+ //(int)(u_left*width)
+ cmp.l.f0.0 (1) null<1>:f r14.0:f 0.00000000:f
+ mov (1) r14.0:d r14.0:f
+ (f0.0)add (1) r14.0:d r14.0<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 5.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.2:f 0.00000000:f
+ mov (1) r14.2:d r14.2:f
+ (f0.0)add (1) r14.2:d r14.2<0;1,0>:d -1:d
+
+ //(int)(u_left*width + 255.0/256)
+ cmp.l.f0.0 (1) null<1>:f r14.3:f 0.00000000:f
+ mov (1) r14.3:d r14.3:f
+ (f0.0)add (1) r14.3:d r14.3<0;1,0>:d -1:d
+
+ mov (1) f0.0:uw 0:uw // clear flag
+ //if (((int)(u_left*width + 5.0/256) > (int)(u_left*width))
+ cmp.g.f1.0 (1) null<1>:d r14.2:d r14.0:d
+ // modified_u_coord = u_coord â 5.0/(256*width); //floating point
+ (f1.0) add (1) r17.2:f r17.2<0;1,0>:f -r2.3:f
+ //else if(((int)(u_left*width + 255.0/256) == (int)(u_left*width))
+ (-f1.0) cmp.e.f0.0 (1) null<1>:d r14.3:d r14.0:d
+ // modified_u_coord = u_coord + 1.0/(256*width); //floating point
+ (f0.0) add (1) r17.2:f r17.2<0;1,0>:f r2.2:f
+
+GEN7_PL3_AVS_WA_DONE_L0_3_:
+ mov (1) f0.0:uw r14.8:uw // restore f0.0
+
+
+ send (1) uwBUFFER_3(0)<1> r16 0x2 a0.0:ud
+ // Returns Y data in 4 GRFs in scrambled order
+
+ mov (1) r17.2:f r14.5:f // restore pixel 0 U for chroma, No AVS WA for chroma
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EB801:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_3(4)<1> r16 0x2 a0.0:ud
+ // Returns U data in 4 GRFs in scrambled order
+
+ add (1) a0.0:ud r23.5<0;1,0>:ud 0x44EBC02:ud // msg desc; 1 is added to change BI to UV
+ mov (1) r16.2:ud 0x0000E000:ud // Enable Red channel
+
+ send (1) uwBUFFER_3(8)<1> r16 0x2 a0.0:ud
+ // Returns V data in 4 GRFs in scrambled order
+
+SKIP_AVS_LOAD_L0_3_:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/PL3_DNDI_422CP.g4a b/src/shaders/post_processing/gen7/PL3_DNDI_422CP.g4a
new file mode 100644
index 0000000..16a0fc9
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_DNDI_422CP.g4a
@@ -0,0 +1,562 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 120 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PL3_DNDI_422CP
+.code
+
+
+
+// FileName: DNDI_PL_Core.asm
+// Author: Tatiya, Rupesh
+
+
+
+// FileName: DNDI_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4BE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+ mov (1) mudMSGHDR_HIST(1)<1> udDNDI_RESP(9,0)<0;1,0> // Move denoise history to MRF (4x1)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x3:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Load_UV_IMC3_16x4.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x4 block through DATAPORT
+
+
+
+// FileName: UVCopy_Load_16x4.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x4 block through DATAPORT
+
+
+ add (2) r27.0<1>:d r7.0<2;2,1>:w r4.4<2;2,1>:w // Source Y Block origin
+ asr (2) r27.0<1>:d r27.0<2;2,1>:d 1:w { NoDDClr } // U/V block origin should be half of Y's
+ mov (1) r27.2<1>:ud 0x10007:ud { NoDDChk } // U/V block width and height (8x2)
+ mov (8) r36<1>:ud r27.0<8;8,1>:ud
+ mov (8) r38<1>:ud r27.0<8;8,1>:ud
+ send (8) udDNDI_UV_RESP(0)<1> r36 0x4 0x2190001:ud
+ send (8) udDNDI_UV_RESP(1)<1> r38 0x4 0x2190002:ud
+
+
+
+// FileName: DN_Save_Y_16x4.asm
+// Author: Vivek Kumar
+// Description: Save one 16x4 blocks of Y channel of DN output for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1) null<1>:w r1.28<0;1,0>:ub 1:w
+
+mov (8) mudMSGHDR_DN_OUT(0)<1> r0<8;8,1>:ud // message header
+mov (2) mdMSGHDR_DN_OUT(0,0)<1> r7.0<2;2,1>:w { NoDDClr } // X origin * 2 (422 output)
+mov (1) mudMSGHDR_DN_OUT(0,2)<1> 0x3000F:ud { NoDDChk } // block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+
+ mov (4) mudMSGHDR_DN_OUT(1,0)<1> udDNDI_RESP(10,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(1,4)<1> udDNDI_RESP(4,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+ mov (4) mudMSGHDR_DN_OUT(2,0)<1> udDNDI_RESP(10,4)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(2,4)<1> udDNDI_RESP(5,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+
+ jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+ mov (4) mudMSGHDR_DN_OUT(1,0)<1> udDNDI_RESP(4,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(1,4)<1> udDNDI_RESP(10,0)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+ mov (4) mudMSGHDR_DN_OUT(2,0)<1> udDNDI_RESP(5,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(2,4)<1> udDNDI_RESP(10,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8) null<1>:d r31.0 0x5 0x60A8018:ud
+
+
+
+// FileName: DI_Save_422CP_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in 422 format to Color Pipe (IECP)
+
+
+.declare mubMSGHDR_DI_OUT1_1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT1_2 Base=r21.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_1 Base=r24.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_2 Base=r27.0 ElementSize=1 Type=ub
+
+
+mov (8) r27.0<1>:ud r0.0<8;8,1>:ud
+shl (1) r27.0<1>:ud r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r27.1<1>:ud r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r27.2<1>:ud 0x3000F:ud { NoDDClr, NoDDChk } // Block width and height (16x8)
+
+//M0.3 - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1) r27.3<1>:ud r2.4<0;1,0>:ud r7.26<0;1,0>:b { NoDDChk }
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r24.0<1>:ud r27<8;8,1>:ud
+
+
+// Pack 2nd field Y; First 8x4 block
+ mov (8) mubMSGHDR_DI_OUT1_1(1)<2> ubDNDI_RESP(0,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(1,16)<2> ubDNDI_RESP(0,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(2)<2> ubDNDI_RESP(0,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(2,16)<2> ubDNDI_RESP(0,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; First 8x4 block
+ mov (4) mubMSGHDR_DI_OUT1_1(1,1)<4> ubDNDI_RESP(2,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(1,17)<4> ubDNDI_RESP(2,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_1(1,3)<4> ubDNDI_RESP(2,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(1,19)<4> ubDNDI_RESP(2,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,1)<4> ubDNDI_RESP(2,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,17)<4> ubDNDI_RESP(2,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_1(2,3)<4> ubDNDI_RESP(2,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,19)<4> ubDNDI_RESP(2,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+
+// Pack 2nd field Y; Second 8x4 block
+mov (8) r21.0<1>:ud r18.0<8;8,1>:ud
+add (1) r21.0<1>:ud r21.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DI_OUT1_2(1)<2> ubDNDI_RESP(0,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(1,16)<2> ubDNDI_RESP(0,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(2)<2> ubDNDI_RESP(0,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(2,16)<2> ubDNDI_RESP(0,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; Second 8x4 block
+ mov (4) mubMSGHDR_DI_OUT1_2(1,1)<4> ubDNDI_RESP(2,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(1,17)<4> ubDNDI_RESP(2,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_2(1,3)<4> ubDNDI_RESP(2,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(1,19)<4> ubDNDI_RESP(2,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,1)<4> ubDNDI_RESP(2,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,17)<4> ubDNDI_RESP(2,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_2(2,3)<4> ubDNDI_RESP(2,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,19)<4> ubDNDI_RESP(2,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+send (8) null<1>:d r18.0 0x5 0x60A801B:ud
+send (8) null<1>:d r21.0 0x5 0x60A801B:ud
+
+// Pack 1st field Y; 1st 8x4 block
+ mov (8) mubMSGHDR_DI_OUT2_1(1)<2> ubDNDI_RESP(4,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(1,16)<2> ubDNDI_RESP(4,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(2)<2> ubDNDI_RESP(4,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(2,16)<2> ubDNDI_RESP(4,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 1st field U,V; 1st 8x4 block
+ mov (4) mubMSGHDR_DI_OUT2_1(1,1)<4> ubDNDI_RESP(6,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(1,17)<4> ubDNDI_RESP(6,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_1(1,3)<4> ubDNDI_RESP(6,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(1,19)<4> ubDNDI_RESP(6,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,1)<4> ubDNDI_RESP(6,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,17)<4> ubDNDI_RESP(6,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_1(2,3)<4> ubDNDI_RESP(6,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,19)<4> ubDNDI_RESP(6,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+// Pack 1st field Y; 2nd 8x4 block
+mov (8) r27.0<1>:ud r24.0<8;8,1>:ud
+add (1) r27.0<1>:ud r27.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DI_OUT2_2(1)<2> ubDNDI_RESP(4,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(1,16)<2> ubDNDI_RESP(4,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(2)<2> ubDNDI_RESP(4,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(2,16)<2> ubDNDI_RESP(4,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 1st field U, V; 2nd 8x4 block
+ mov (4) mubMSGHDR_DI_OUT2_2(1,1)<4> ubDNDI_RESP(6,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(1,17)<4> ubDNDI_RESP(6,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_2(1,3)<4> ubDNDI_RESP(6,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(1,19)<4> ubDNDI_RESP(6,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,1)<4> ubDNDI_RESP(6,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,17)<4> ubDNDI_RESP(6,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_2(2,3)<4> ubDNDI_RESP(6,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,19)<4> ubDNDI_RESP(6,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+send (8) null<1>:d r24.0 0x5 0x60A801E:ud
+send (8) null<1>:d r27.0 0x5 0x60A801E:ud
+
+
+
+// FileName: DN_Save_UV_IMC3_16x4.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x4 block through DATAPORT
+
+
+
+// FileName: UVCopy_Save_16x4.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x4 block through DATAPORT
+
+
+//Reuse the header from Load component
+
+
+ mov (4) mudMSGHDR_UCOPY(1)<1> udDNDI_UV_RESP(0)<4;4,1>
+ mov (4) mudMSGHDR_VCOPY(1)<1> udDNDI_UV_RESP(1)<4;4,1>
+ send (4) null<1>:d r36 0x5 0x40A8019:ud
+ send (4) null<1>:d r38 0x5 0x40A801A:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL3_DNDI_PA.g4a b/src/shaders/post_processing/gen7/PL3_DNDI_PA.g4a
new file mode 100644
index 0000000..65bceeb
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_DNDI_PA.g4a
@@ -0,0 +1,500 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 90 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PL3_DNDI_PA
+.code
+
+
+
+// FileName: DNDI_PL_Core.asm
+// Author: Tatiya, Rupesh
+
+
+
+// FileName: DNDI_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN+DI case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4BE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+ mov (1) mudMSGHDR_HIST(1)<1> udDNDI_RESP(9,0)<0;1,0> // Move denoise history to MRF (4x1)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x3:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Load_UV_IMC3_16x4.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x4 block through DATAPORT
+
+
+
+// FileName: UVCopy_Load_16x4.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x4 block through DATAPORT
+
+
+ add (2) r27.0<1>:d r7.0<2;2,1>:w r4.4<2;2,1>:w // Source Y Block origin
+ asr (2) r27.0<1>:d r27.0<2;2,1>:d 1:w { NoDDClr } // U/V block origin should be half of Y's
+ mov (1) r27.2<1>:ud 0x10007:ud { NoDDChk } // U/V block width and height (8x2)
+ mov (8) r36<1>:ud r27.0<8;8,1>:ud
+ mov (8) r38<1>:ud r27.0<8;8,1>:ud
+ send (8) udDNDI_UV_RESP(0)<1> r36 0x4 0x2190001:ud
+ send (8) udDNDI_UV_RESP(1)<1> r38 0x4 0x2190002:ud
+
+
+
+// FileName: DN_Save_Y_16x4.asm
+// Author: Vivek Kumar
+// Description: Save one 16x4 blocks of Y channel of DN output for reference
+
+
+ // check top/bottom field first
+cmp.e.f0.0 (1) null<1>:w r1.28<0;1,0>:ub 1:w
+
+mov (8) mudMSGHDR_DN_OUT(0)<1> r0<8;8,1>:ud // message header
+mov (2) mdMSGHDR_DN_OUT(0,0)<1> r7.0<2;2,1>:w { NoDDClr } // X origin * 2 (422 output)
+mov (1) mudMSGHDR_DN_OUT(0,2)<1> 0x3000F:ud { NoDDChk } // block width and height (32x8)
+
+(f0.0) jmpi (1) TOP_FIELD_FIRST
+
+BOTTOM_FIELD_FIRST:
+
+ mov (4) mudMSGHDR_DN_OUT(1,0)<1> udDNDI_RESP(10,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(1,4)<1> udDNDI_RESP(4,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+ mov (4) mudMSGHDR_DN_OUT(2,0)<1> udDNDI_RESP(10,4)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(2,4)<1> udDNDI_RESP(5,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+
+ jmpi (1) SAVE_DN_CURR
+
+TOP_FIELD_FIRST:
+ mov (4) mudMSGHDR_DN_OUT(1,0)<1> udDNDI_RESP(4,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(1,4)<1> udDNDI_RESP(10,0)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+ mov (4) mudMSGHDR_DN_OUT(2,0)<1> udDNDI_RESP(5,0)<4;4,1> { NoDDClr } // 2nd field luma from current frame (line 0,2)
+ mov (4) mudMSGHDR_DN_OUT(2,4)<1> udDNDI_RESP(10,4)<4;4,1> { NoDDChk } // 1st field luma from current frame (line 1,3)
+
+SAVE_DN_CURR:
+//send out data through data port
+send (8) null<1>:d r31.0 0x5 0x60A8018:ud
+
+
+
+// FileName: DI_Save_PA_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+add (4) a0.4<1>:uw r2.28<4;4,1>:ub 608:w // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r27.2<1>:ud 0x3001F:ud { NoDDChk } // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r23.0<1>:ud r27<8;8,1>:ud
+
+// Pack 2nd field Y
+ mov (16) r[a0.4, 0]<2> ubDNDI_RESP(0,0) { NoDDClr }
+ mov (16) r[a0.4, 32]<2> ubDNDI_RESP(0,16) { NoDDClr }
+ mov (16) r[a0.4, 64]<2> ubDNDI_RESP(0,32) { NoDDClr }
+ mov (16) r[a0.4, 96]<2> ubDNDI_RESP(0,48) { NoDDClr }
+// Pack 2nd field U
+ mov (8) r[a0.5, 0]<4> ubDNDI_RESP(2,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 32]<4> ubDNDI_RESP(2,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 64]<4> ubDNDI_RESP(2,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 96]<4> ubDNDI_RESP(2,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 2nd field V
+ mov (8) r[a0.6, 0]<4> ubDNDI_RESP(2,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 32]<4> ubDNDI_RESP(2,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 64]<4> ubDNDI_RESP(2,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 96]<4> ubDNDI_RESP(2,48)<16;8,2> { NoDDChk } //Vpixels
+
+// Pack 1st field Y
+ mov (16) r[a0.4, 160]<2> ubDNDI_RESP(4,0) { NoDDClr }
+ mov (16) r[a0.4, 192]<2> ubDNDI_RESP(4,16) { NoDDClr }
+ mov (16) r[a0.4, 224]<2> ubDNDI_RESP(4,32) { NoDDClr }
+ mov (16) r[a0.4, 256]<2> ubDNDI_RESP(4,48) { NoDDClr }
+// Pack 1st field U
+ mov (8) r[a0.5, 160]<4> ubDNDI_RESP(6,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 192]<4> ubDNDI_RESP(6,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 224]<4> ubDNDI_RESP(6,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 256]<4> ubDNDI_RESP(6,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 1st field V
+ mov (8) r[a0.6, 160]<4> ubDNDI_RESP(6,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 192]<4> ubDNDI_RESP(6,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 224]<4> ubDNDI_RESP(6,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 256]<4> ubDNDI_RESP(6,48)<16;8,2> { NoDDChk } //Vpixels
+
+//save the previous frame
+send (8) null<1>:d r18.0 0x5 0xA0A801B:ud
+
+//save the current frame
+send (8) null<1>:d r23.0 0x5 0xA0A801E:ud
+
+
+
+// FileName: DN_Save_UV_IMC3_16x4.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x4 block through DATAPORT
+
+
+
+// FileName: UVCopy_Save_16x4.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x4 block through DATAPORT
+
+
+//Reuse the header from Load component
+
+
+ mov (4) mudMSGHDR_UCOPY(1)<1> udDNDI_UV_RESP(0)<4;4,1>
+ mov (4) mudMSGHDR_VCOPY(1)<1> udDNDI_UV_RESP(1)<4;4,1>
+ send (4) null<1>:d r36 0x5 0x40A8019:ud
+ send (4) null<1>:d r38 0x5 0x40A801A:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL3_DNUV_PL3.g4a b/src/shaders/post_processing/gen7/PL3_DNUV_PL3.g4a
new file mode 100644
index 0000000..2e1ad5f
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_DNUV_PL3.g4a
@@ -0,0 +1,2684 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 1295 // Total instruction count
+// 1 // Total kernel count
+
+
+.kernel PL3_DNUV_PL3
+.code
+
+
+
+//Module : DN_UV_Setup
+//Author : Tatiya, Rupesh
+//Description : Initial Set-up for DN_UV
+
+
+
+
+// Module name : ChromaDenoise.inc
+// Author : Tatiya, Rupesh
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//======================================================
+//Interface for serpent mode Chroma Denoise, added by Le
+//======================================================
+//r1
+
+
+//noise history thresholds (low and high)
+
+
+//temporal difference thresholds (high and low)
+
+
+//noise history thresholds (low and high)
+//#define ubNoiseHistMaxHigh r1.22
+//#define ubNoiseHistMaxLow r1.23
+//#define ubNoiseHistDeltaHigh r1.24
+//#define ubNoiseHistDeltaLow r1.25
+
+//Gaussian thresholds
+
+
+//temporal difference thresholds (default)
+
+
+//r2
+//history thresholds (default)
+
+
+//denoise factor (0-63)
+
+
+//====================== Binding table (Explicit To DNUV)=========================================
+//Used by DN_UV kernels
+
+
+ //Pointer to Current Frame UV
+
+
+//r1-r6
+ //CURBE GRFs used as TEMP : Used for max computation and storing max temporarily. : r1-r6
+
+
+ .declare ubCURBE_TEMP Base=r1.0 ElementSize=1 Type=ub
+ .declare uwCURBE_TEMP Base=r1.0 ElementSize=2 Type=uw
+ .declare wCURBE_TEMP Base=r1.0 ElementSize=2 Type=w
+ .declare fCURBE_TEMP Base=r1.0 ElementSize=4 Type=f
+ .declare udCURBE_TEMP Base=r1.0 ElementSize=4 Type=ud
+ .declare uwMAX_ABS_DIFF Base=r5.0 ElementSize=2 Type=uw
+
+ //r1
+
+
+ //r3
+
+
+ //r4
+
+//r7
+ //All of the following has to defined in Same GRF for optimal performance.
+
+
+//r8-24
+ //Previous Frame UV
+
+ .declare udPREV_UV Base=r8.0 ElementSize=4 Type=ud
+ .declare ubPREV_UV Base=r8.0 ElementSize=1 Type=ub
+
+
+//r25-48
+ //TEMP Space for any Usage.
+
+
+//=========================================================================
+//Definations and declarations for serpent mode Chroma Denoise, added by Le
+//=========================================================================
+
+
+ .declare udGNE_UV Base=r24.0 ElementSize=4 Type=ud
+ .declare fGNE_UV Base=r24.0 ElementSize=4 Type=f
+ .declare ubGNE_UV Base=r24.0 ElementSize=1 Type=ub
+
+ .declare udMSGHDR_BNE_SERP Base=r25.0 ElementSize=4 Type=ud
+ .declare udMSGSRC_BNE_SERP Base=r26.0 ElementSize=4 Type=ud
+
+
+ .declare ubDN_UV_Thresholds Base=r26.0 ElementSize=1 Type=ub
+ .declare ubDN_UV_Thresholds_Temp Base=r27.0 ElementSize=1 Type=ub
+ .declare udDN_UV_Thresholds Base=r26.0 ElementSize=4 Type=ud
+ .declare udDN_UV_Thresholds_Temp Base=r27.0 ElementSize=4 Type=ud
+ .declare fDN_UV_Thresholds Base=r26.0 ElementSize=4 Type=f
+ .declare fDN_UV_Thresholds_Temp Base=r27.0 ElementSize=4 Type=f
+
+
+//====================================================================================
+
+
+ //TEMP23: To hold V data for PL3 surfaces
+ .declare udCURR_V_TEMP Base=r25.0 ElementSize=4 Type=ud
+ .declare ubCURR_V_TEMP Base=r25.0 ElementSize=1 Type=ub
+
+ //GRFs to calculate Median: r25-r42
+ .declare ubMEDIAN_TEMP Base=r25.0 ElementSize=1 Type=ub
+
+ //18 GRFs to hold difference : r25-r42
+ .declare wDIFF Base=r25.0 ElementSize=2 Type=w
+ .declare uwDIFF Base=r25.0 ElementSize=2 Type=uw
+
+ //Temporal Diff
+ .declare wDIFF_TEMPORAL Base=r25.0 ElementSize=2 Type=w
+ .declare ubDIFF_TEMPORAL Base=r25.0 ElementSize=1 Type=ub
+
+ //4 GRFs to hold Sobel Value : r43-46
+ .declare wSOBEL_X Base=r43.0 ElementSize=2 Type=w
+ .declare uwSOBEL Base=r43.0 ElementSize=2 Type=uw
+
+
+ //2 GRFs to hold SOAD temporarily: r47-48
+ .declare uwSOAD Base=r47.0 ElementSize=2 Type=uw
+
+ //Temp GRFs to hold extra YUYV pixels: r43-r48
+ .declare ubTEMP5 Base=r43.0 ElementSize=1 Type=ub
+
+ //Temp GRFs in Median Calculation: r47-r48
+ .declare ubTEMP1 Base=r47.0 ElementSize=1 Type=ub
+
+ .declare uwTEMP0 Base=r48.0 ElementSize=2 Type=uw
+ .declare ubTEMP0 Base=r48.0 ElementSize=1 Type=ub
+
+ //Temp Space to store Median : r49-50
+
+ .declare ubMEDIAN Base=r49.0 ElementSize=1 Type=ub
+
+//r49
+
+
+//r50
+ //Message Source
+
+
+//r51
+ //DN_UV History Surface
+
+ .declare udHIST_UV Base=r51.0 ElementSize=4 Type=ud
+ .declare ubHIST_UV Base=r51.0 ElementSize=1 Type=ub
+
+//r52 - r91
+ //r52
+ //Current Frame UV
+
+
+ .declare udCURR_UV Base=r52.0 ElementSize=4 Type=ud
+ .declare ubCURR_UV Base=r52.0 ElementSize=1 Type=ub
+
+ //r54
+ //CURBE COPY
+
+
+ //r55
+
+
+ .declare uwSOAD_MIN_8x4 Base=r56.0 ElementSize=2 Type=uw
+
+ //r61
+
+
+ //r62
+
+
+ //History Surface Temp Origin
+
+
+ //r63
+ //Current Frame Y Temp Origin
+
+
+ //BNE Surface Origin
+
+
+ //r70
+
+ .declare uwDIFF_TEMPORAL_SUM4x4 Base=r70.0 ElementSize=2 Type=uw //4 GRFs
+
+ //r74-91 : For Saving Dest UV (PL2/PL3)
+
+
+ .declare ubMSGPAYLOAD_UV0 Base=r75.0 ElementSize=1 Type=ub
+
+
+ .declare ubMSGPAYLOAD_U Base=r75.0 ElementSize=1 Type=ub
+
+
+ .declare ubMSGPAYLOAD_UV1 Base=r84.0 ElementSize=1 Type=ub
+
+
+ .declare ubMSGPAYLOAD_V Base=r84.0 ElementSize=1 Type=ub
+
+ //r90
+
+ .declare uwDIFF_TEMPORAL_SUM4x4_FINAL Base=r90.0 ElementSize=2 Type=uw //2 GRFs
+
+//r92-127
+ //Current Frame Y
+
+
+ //r92
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_0 Base=r92 ElementSize=2 Type=uw
+ //r101
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_1 Base=r101 ElementSize=2 Type=uw
+ //r110
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_2 Base=r110 ElementSize=2 Type=uw
+ //r119
+ .declare uwDIFF_TEMPORAL_SUM4x4_TEMP_3 Base=r119 ElementSize=2 Type=uw
+
+ .declare udCURR_Y0 Base=r93.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y0 Base=r93.0 ElementSize=1 Type=ub
+ .declare udCURR_Y1 Base=r102.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y1 Base=r102.0 ElementSize=1 Type=ub
+ .declare udCURR_Y2 Base=r111.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y2 Base=r111.0 ElementSize=1 Type=ub
+ .declare udCURR_Y3 Base=r120.0 ElementSize=4 Type=ud
+ .declare ubCURR_Y3 Base=r120.0 ElementSize=1 Type=ub
+
+ //r92: To hold U data for PL3 surfaces
+ .declare udCURR_U_TEMP Base=r92.0 ElementSize=4 Type=ud
+ .declare ubCURR_U_TEMP Base=r92.0 ElementSize=1 Type=ub
+
+ //r112: To hold U data for PL3 surfaces
+ .declare udPREV_U_TEMP Base=r112.0 ElementSize=4 Type=ud
+ .declare ubPREV_U_TEMP Base=r112.0 ElementSize=1 Type=ub
+
+ //r120: To hold U data for PL3 surfaces
+ .declare udPREV_V_TEMP Base=r120.0 ElementSize=4 Type=ud
+ .declare ubPREV_V_TEMP Base=r120.0 ElementSize=1 Type=ub
+
+
+ // Initialize message source with r0.
+ mov (8) r50.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r92.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r101.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r110.0<1>:ud r0.0<8;8,1>:ud
+ mov (8) r119.0<1>:ud r0.0<8;8,1>:ud
+
+
+
+//Module Name : DN_UV_PL3_Load_Curr_Frame_UV
+//Author : Tatiya, Rupesh
+//Description : Loads Current Frame U/V data for PL3 input.
+
+
+
+//Module name : DN_UV_Load_Curr_Frame_UV
+//Author : Tatiya, Rupesh
+//Description : Loads Current Frame (UV only).
+// We need 4 extra rows (2 per field) and 2 extra pixel (1 each side) for both U and V each.
+// The processing size is 16x16 U and V each. So we need : U size - 18x20, V size - 18x20, UV size - 36x20, YUYV size - 72x20.
+
+
+
+
+//18x20 U/V block is partitioned as follows:
+// <------ 18 ------>
+// ------------------
+// | 18x8 A1 |
+// | |
+// |----------------|
+// | 18x8 A2 |
+// | |
+// |----------------|
+// | 18x4 A2 |
+// |----------------|
+//
+// Cordinates: (x-1, y-2), (x-1, y+6), (x-1, y+14)
+
+//1. Load U data into starting at CURR_Y0 (r93-r122)
+//2. Load V data into TEMP space (r25-r44)
+
+ //U/V surface origin: (ORIX/2, ORIY/2)
+ add (2) r7.4<1>:w r7.0<2;2,1>:w r4.4<2;2,1>:w { AccWrEn } // Source Block origin
+ shr (2) r7.4<1>:w acc0.4<2;2,1>:w 1:w //U Data
+ mov (2) acc0.0<1>:d r7.4<2;2,1>:w
+
+ //A1
+ add (1) r50.0<1>:d acc0.0<0;1,0>:d -1:d
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d -2:d
+ mov (1) r50.2<1>:ud 0x70011:ud
+ send (8) udCURR_U_TEMP(0)<1> r50 0x4 0x2890004:ud
+
+ //A2
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d 6:d
+ send (8) udCURR_U_TEMP(8)<1> r50 0x4 0x2890004:ud
+
+ //A3
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d 14:d
+ mov (1) r50.2<1>:ud 0x30011:ud
+ send (8) udCURR_U_TEMP(16)<1> r50 0x4 0x2490004:ud
+
+ //V Data
+ //A1
+ add (1) r50.0<1>:d acc0.0<0;1,0>:d -1:d
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d -2:d
+ mov (1) r50.2<1>:ud 0x70011:ud
+ send (8) udCURR_V_TEMP(0)<1> r50 0x4 0x2890005:ud
+
+ //A2
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d 6:d
+ send (8) udCURR_V_TEMP(8)<1> r50 0x4 0x2890005:ud
+
+ //A3
+ add (1) r50.1<1>:d acc0.1<0;1,0>:d 14:d
+ mov (1) r50.2<1>:ud 0x30011:ud
+ send (8) udCURR_V_TEMP(16)<1> r50 0x4 0x2490005:ud
+
+ //History Origin, Current Y origin and BNE surface origin - all are in inline GRF. Use , . -rT.
+
+ //Calculate Origin For History Surface: (ORIX/4, ORIY/8)
+ mov (16) acc0.0<1>:w r7.0<0;2,1>:w { AccWrEn }
+ shr (1) r7.2<1>:w acc0.2<0;1,0>:w 2:w
+ shr (1) r7.3<1>:w acc0.3<0;1,0>:w 3:w
+
+ //Calculate Origin For BNE Surface: (ORIX/8, ORIY/16)
+ shr (1) r7.6<1>:w acc0.6<0;1,0>:w 3:w
+ shr (1) r7.7<1>:w acc0.7<0;1,0>:w 4:w
+
+
+
+//Module Name : DN_UV_PL3_Load_Prev_Frame_UV.asm
+//Author : Tatiya, Rupesh
+//Description : Loads Pevious Frame UV data for PL3 input.
+
+
+
+//Module Name : DN_UV_Load_Prev_Frame_UV
+//Author : Tatiya, Rupesh
+//Description : Loads Prev Frame (UV only). U size - 16x16, V size - 16x16, UV size - 32x16, YUYV size - 64x16.
+
+
+
+
+//1. Load U in bottom half of UV space for prev frame (r17-r24)
+//2. Load V in bottom quarter of Y space for curr frame (r120-r127)
+
+ mov (2) r50.0<1>:d r7.4<2;2,1>:w { AccWrEn } // Source lock origin
+ mov (1) r50.2<1>:ud 0xF000F:ud // U/V block width and height (16x16)
+
+ mov (8) r49.0<1>:ud r50<8;8,1>:ud
+
+ send (8) udPREV_U_TEMP(0)<1> r50 0x4 0x2890001:ud //U data
+ send (8) udPREV_V_TEMP(0)<1> r49 0x4 0x2890002:ud //V data
+
+
+ //TODO - See if History loading can be combined with Prev Frame Load. - rT
+
+
+//Module name : DN_UV_Load_Hist_UV
+//Author : Tatiya, Rupesh
+//Description : Load DN History for UV denoise. 4x4 for each U & V.
+
+
+
+
+ mov (2) r50.0<1>:d r7.2<2;2,1>:w
+ mov (1) r50.2<1>:ud 0x30007:ud
+ send (8) udHIST_UV(0)<1> r50 0x4 0x2190022:ud
+
+
+
+//File Name : DN_UV_PL3_Interleave_Curr_Frame_UV.asm
+//Author : Tatiya, Rupesh
+//Description : Interleave separately loaded U and V for PL3 format.
+// This is needed because Noise Detection and Noise Reduction works on interleaved UV data.
+
+//1. U data: Starting at CURR_Y0 (r93-r122)
+//2. V data: TEMP space (r25-r44)
+
+//In one GRF, we need 10 U (1+8+1) bytes, but there's no SIMD10. So use SIMD16 and discard last 6 bytes.
+
+
+//Move U data
+ mov (16) ubCURR_UV(0,0)<2> ubCURR_U_TEMP(0,0)<16;16,1>
+ mov (16) ubCURR_UV(20,0)<2> ubCURR_U_TEMP(0,8)<16;16,1>
+
+ mov (16) ubCURR_UV(1,0)<2> ubCURR_U_TEMP(1,0)<16;16,1>
+ mov (16) ubCURR_UV(21,0)<2> ubCURR_U_TEMP(1,8)<16;16,1>
+
+ mov (16) ubCURR_UV(2,0)<2> ubCURR_U_TEMP(2,0)<16;16,1>
+ mov (16) ubCURR_UV(22,0)<2> ubCURR_U_TEMP(2,8)<16;16,1>
+
+ mov (16) ubCURR_UV(3,0)<2> ubCURR_U_TEMP(3,0)<16;16,1>
+ mov (16) ubCURR_UV(23,0)<2> ubCURR_U_TEMP(3,8)<16;16,1>
+
+ mov (16) ubCURR_UV(4,0)<2> ubCURR_U_TEMP(4,0)<16;16,1>
+ mov (16) ubCURR_UV(24,0)<2> ubCURR_U_TEMP(4,8)<16;16,1>
+
+ mov (16) ubCURR_UV(5,0)<2> ubCURR_U_TEMP(5,0)<16;16,1>
+ mov (16) ubCURR_UV(25,0)<2> ubCURR_U_TEMP(5,8)<16;16,1>
+
+ mov (16) ubCURR_UV(6,0)<2> ubCURR_U_TEMP(6,0)<16;16,1>
+ mov (16) ubCURR_UV(26,0)<2> ubCURR_U_TEMP(6,8)<16;16,1>
+
+ mov (16) ubCURR_UV(7,0)<2> ubCURR_U_TEMP(7,0)<16;16,1>
+ mov (16) ubCURR_UV(27,0)<2> ubCURR_U_TEMP(7,8)<16;16,1>
+
+ mov (16) ubCURR_UV(8,0)<2> ubCURR_U_TEMP(8,0)<16;16,1>
+ mov (16) ubCURR_UV(28,0)<2> ubCURR_U_TEMP(8,8)<16;16,1>
+
+ mov (16) ubCURR_UV(9,0)<2> ubCURR_U_TEMP(9,0)<16;16,1>
+ mov (16) ubCURR_UV(29,0)<2> ubCURR_U_TEMP(9,8)<16;16,1>
+
+ mov (16) ubCURR_UV(10,0)<2> ubCURR_U_TEMP(10,0)<16;16,1>
+ mov (16) ubCURR_UV(30,0)<2> ubCURR_U_TEMP(10,8)<16;16,1>
+
+ mov (16) ubCURR_UV(11,0)<2> ubCURR_U_TEMP(11,0)<16;16,1>
+ mov (16) ubCURR_UV(31,0)<2> ubCURR_U_TEMP(11,8)<16;16,1>
+
+ mov (16) ubCURR_UV(12,0)<2> ubCURR_U_TEMP(12,0)<16;16,1>
+ mov (16) ubCURR_UV(32,0)<2> ubCURR_U_TEMP(12,8)<16;16,1>
+
+ mov (16) ubCURR_UV(13,0)<2> ubCURR_U_TEMP(13,0)<16;16,1>
+ mov (16) ubCURR_UV(33,0)<2> ubCURR_U_TEMP(13,8)<16;16,1>
+
+ mov (16) ubCURR_UV(14,0)<2> ubCURR_U_TEMP(14,0)<16;16,1>
+ mov (16) ubCURR_UV(34,0)<2> ubCURR_U_TEMP(14,8)<16;16,1>
+
+ mov (16) ubCURR_UV(15,0)<2> ubCURR_U_TEMP(15,0)<16;16,1>
+ mov (16) ubCURR_UV(35,0)<2> ubCURR_U_TEMP(15,8)<16;16,1>
+
+ mov (16) ubCURR_UV(16,0)<2> ubCURR_U_TEMP(16,0)<16;16,1>
+ mov (16) ubCURR_UV(36,0)<2> ubCURR_U_TEMP(16,8)<16;16,1>
+
+ mov (16) ubCURR_UV(17,0)<2> ubCURR_U_TEMP(17,0)<16;16,1>
+ mov (16) ubCURR_UV(37,0)<2> ubCURR_U_TEMP(17,8)<16;16,1>
+
+ mov (16) ubCURR_UV(18,0)<2> ubCURR_U_TEMP(18,0)<16;16,1>
+ mov (16) ubCURR_UV(38,0)<2> ubCURR_U_TEMP(18,8)<16;16,1>
+
+ mov (16) ubCURR_UV(19,0)<2> ubCURR_U_TEMP(19,0)<16;16,1>
+ mov (16) ubCURR_UV(39,0)<2> ubCURR_U_TEMP(19,8)<16;16,1>
+
+
+//Move V data
+ mov (16) ubCURR_UV(0,1)<2> ubCURR_V_TEMP(0,0)<16;16,1>
+ mov (16) ubCURR_UV(20,1)<2> ubCURR_V_TEMP(0,8)<16;16,1>
+ mov (16) ubCURR_UV(1,1)<2> ubCURR_V_TEMP(1,0)<16;16,1>
+ mov (16) ubCURR_UV(21,1)<2> ubCURR_V_TEMP(1,8)<16;16,1>
+ mov (16) ubCURR_UV(2,1)<2> ubCURR_V_TEMP(2,0)<16;16,1>
+ mov (16) ubCURR_UV(22,1)<2> ubCURR_V_TEMP(2,8)<16;16,1>
+ mov (16) ubCURR_UV(3,1)<2> ubCURR_V_TEMP(3,0)<16;16,1>
+ mov (16) ubCURR_UV(23,1)<2> ubCURR_V_TEMP(3,8)<16;16,1>
+ mov (16) ubCURR_UV(4,1)<2> ubCURR_V_TEMP(4,0)<16;16,1>
+ mov (16) ubCURR_UV(24,1)<2> ubCURR_V_TEMP(4,8)<16;16,1>
+ mov (16) ubCURR_UV(5,1)<2> ubCURR_V_TEMP(5,0)<16;16,1>
+ mov (16) ubCURR_UV(25,1)<2> ubCURR_V_TEMP(5,8)<16;16,1>
+ mov (16) ubCURR_UV(6,1)<2> ubCURR_V_TEMP(6,0)<16;16,1>
+ mov (16) ubCURR_UV(26,1)<2> ubCURR_V_TEMP(6,8)<16;16,1>
+ mov (16) ubCURR_UV(7,1)<2> ubCURR_V_TEMP(7,0)<16;16,1>
+ mov (16) ubCURR_UV(27,1)<2> ubCURR_V_TEMP(7,8)<16;16,1>
+ mov (16) ubCURR_UV(8,1)<2> ubCURR_V_TEMP(8,0)<16;16,1>
+ mov (16) ubCURR_UV(28,1)<2> ubCURR_V_TEMP(8,8)<16;16,1>
+ mov (16) ubCURR_UV(9,1)<2> ubCURR_V_TEMP(9,0)<16;16,1>
+ mov (16) ubCURR_UV(29,1)<2> ubCURR_V_TEMP(9,8)<16;16,1>
+ mov (16) ubCURR_UV(10,1)<2> ubCURR_V_TEMP(10,0)<16;16,1>
+ mov (16) ubCURR_UV(30,1)<2> ubCURR_V_TEMP(10,8)<16;16,1>
+ mov (16) ubCURR_UV(11,1)<2> ubCURR_V_TEMP(11,0)<16;16,1>
+ mov (16) ubCURR_UV(31,1)<2> ubCURR_V_TEMP(11,8)<16;16,1>
+ mov (16) ubCURR_UV(12,1)<2> ubCURR_V_TEMP(12,0)<16;16,1>
+ mov (16) ubCURR_UV(32,1)<2> ubCURR_V_TEMP(12,8)<16;16,1>
+ mov (16) ubCURR_UV(13,1)<2> ubCURR_V_TEMP(13,0)<16;16,1>
+ mov (16) ubCURR_UV(33,1)<2> ubCURR_V_TEMP(13,8)<16;16,1>
+ mov (16) ubCURR_UV(14,1)<2> ubCURR_V_TEMP(14,0)<16;16,1>
+ mov (16) ubCURR_UV(34,1)<2> ubCURR_V_TEMP(14,8)<16;16,1>
+ mov (16) ubCURR_UV(15,1)<2> ubCURR_V_TEMP(15,0)<16;16,1>
+ mov (16) ubCURR_UV(35,1)<2> ubCURR_V_TEMP(15,8)<16;16,1>
+ mov (16) ubCURR_UV(16,1)<2> ubCURR_V_TEMP(16,0)<16;16,1>
+ mov (16) ubCURR_UV(36,1)<2> ubCURR_V_TEMP(16,8)<16;16,1>
+ mov (16) ubCURR_UV(17,1)<2> ubCURR_V_TEMP(17,0)<16;16,1>
+ mov (16) ubCURR_UV(37,1)<2> ubCURR_V_TEMP(17,8)<16;16,1>
+ mov (16) ubCURR_UV(18,1)<2> ubCURR_V_TEMP(18,0)<16;16,1>
+ mov (16) ubCURR_UV(38,1)<2> ubCURR_V_TEMP(18,8)<16;16,1>
+ mov (16) ubCURR_UV(19,1)<2> ubCURR_V_TEMP(19,0)<16;16,1>
+ mov (16) ubCURR_UV(39,1)<2> ubCURR_V_TEMP(19,8)<16;16,1>
+
+
+
+//File Name : DN_UV_PL3_Interleave_Prev_Frame_UV.asm
+//Author : Tatiya, Rupesh
+//Description : Interleave separately loaded U and V for PL3 format.
+// This is needed because Noise Detection and Noise Reduction works on interleaved UV data.
+
+//1.U Data: bottom half of UV space for prev frame (r17-r24)
+//2.V Data: bottom quarter of Y space for curr frame (r120-r127)
+
+ mov (16) ubPREV_UV(0,0)<2> ubPREV_U_TEMP(0,0)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(8,0)<2> ubPREV_U_TEMP(0,8)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(1,0)<2> ubPREV_U_TEMP(1,0)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(9,0)<2> ubPREV_U_TEMP(1,8)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(2,0)<2> ubPREV_U_TEMP(2,0)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(10,0)<2> ubPREV_U_TEMP(2,8)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(3,0)<2> ubPREV_U_TEMP(3,0)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(11,0)<2> ubPREV_U_TEMP(3,8)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(4,0)<2> ubPREV_U_TEMP(4,0)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(12,0)<2> ubPREV_U_TEMP(4,8)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(5,0)<2> ubPREV_U_TEMP(5,0)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(13,0)<2> ubPREV_U_TEMP(5,8)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(6,0)<2> ubPREV_U_TEMP(6,0)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(14,0)<2> ubPREV_U_TEMP(6,8)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(7,0)<2> ubPREV_U_TEMP(7,0)<16;8,1> { NoDDClr }
+ mov (16) ubPREV_UV(15,0)<2> ubPREV_U_TEMP(7,8)<16;8,1> { NoDDClr }
+
+ mov (16) ubPREV_UV(0,1)<2> ubPREV_V_TEMP(0,0)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(8,1)<2> ubPREV_V_TEMP(0,8)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(1,1)<2> ubPREV_V_TEMP(1,0)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(9,1)<2> ubPREV_V_TEMP(1,8)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(2,1)<2> ubPREV_V_TEMP(2,0)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(10,1)<2> ubPREV_V_TEMP(2,8)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(3,1)<2> ubPREV_V_TEMP(3,0)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(11,1)<2> ubPREV_V_TEMP(3,8)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(4,1)<2> ubPREV_V_TEMP(4,0)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(12,1)<2> ubPREV_V_TEMP(4,8)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(5,1)<2> ubPREV_V_TEMP(5,0)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(13,1)<2> ubPREV_V_TEMP(5,8)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(6,1)<2> ubPREV_V_TEMP(6,0)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(14,1)<2> ubPREV_V_TEMP(6,8)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(7,1)<2> ubPREV_V_TEMP(7,0)<16;8,1> { NoDDChk }
+ mov (16) ubPREV_UV(15,1)<2> ubPREV_V_TEMP(7,8)<16;8,1> { NoDDChk }
+
+
+
+//Module Name : DN_UV_420_Load_Curr_Frame_Y
+//Author : Tatiya, Rupesh
+//Description : Load Curr Frame Y data for 420 Input
+
+
+
+//Module Name : DN_UV_Load_Curr_Frame_Y
+//Author : Tatiya, Rupesh
+//Description : Loads Y of Current frame.
+
+
+
+
+ //For 16x16 U and 16x16 V for 420, we need to read 32x32 Y.
+
+ mov (8) acc0.0<1>:ud r0.0<8;8,1>:ud
+ mov (1) acc0.2<1>:ud 0xF000F:ud
+ add (2) acc0.0<1>:ud r7.0<2;2,1>:w r4.4<2;2,1>:w
+
+ mov (8) r92.0<1>:ud acc0.0<8;8,1>:ud
+
+ mov (8) r101.0<1>:ud acc0.0<8;8,1>:ud
+ mov (8) r110.0<1>:ud acc0.0<8;8,1>:ud
+ mov (8) r119.0<1>:ud acc0.0<8;8,1>:ud
+
+ add (1) r101.1<1>:d acc0.1<0;1,0>:d 16:d
+
+ add (1) r110.0<1>:d acc0.0<0;1,0>:d 16:d
+
+ add (2) r119.0<1>:d acc0.0<2;2,1>:d 16:d
+
+ send (8) udCURR_Y0(0)<1> r92 0x4 0x2890003:ud
+ send (8) udCURR_Y1(0)<1> r101 0x4 0x2890003:ud
+ send (8) udCURR_Y2(0)<1> r110 0x4 0x2890003:ud
+ send (8) udCURR_Y3(0)<1> r119 0x4 0x2890003:ud
+
+
+
+//Module Name : DN_UV_Noise_Detection_UV
+//Author : Tatiya, Rupesh
+//Description : Performs noise detection on 16x16 U and 16x16 V each.
+
+
+
+//Module Name : DN_UV_Move_CURBE_Inline_UV.asm
+//Author : Tatiya, Rupesh
+
+
+
+
+ //Mov CURBE data to another space - so that it can be used as Temp Space --> r1 - r6
+ mov (4) r54.28<1>:ub r2.28<4;4,1>:ub //Dest. YUY2 offset
+ mov (2) r54.5<1>:ud r4.0<4;2,2>:ud //Src YUY2 offset and Origin offset
+ mov (4) r55.28<1>:ub r1.0<4;4,1>:ub
+
+ mov (8) r61.20<1>:ub r1.4<8;8,1>:ub
+ mov (4) r61.28<1>:ub r1.12<4;4,1>:ub
+
+ //Move Inline Data to another space - so that it can be used as Temp Space --> r7
+ mov (4) r62.10<1>:w r7.0<4;4,1>:w
+ mov (4) r63.10<1>:w r7.4<4;4,1>:w
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ mov (1) a0.0:uw 1664:uw
+ mov (1) a0.1:uw 1816:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 1792:uw
+ mov (1) a0.1:uw 1820:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 1920:uw
+ mov (1) a0.1:uw 1848:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2048:uw
+ mov (1) a0.1:uw 1852:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ mov (1) a0.0:uw 2304:uw
+ mov (1) a0.1:uw 1880:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2432:uw
+ mov (1) a0.1:uw 1884:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2560:uw
+ mov (1) a0.1:uw 1912:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+
+
+//Module Name : DN_UV_Noise_Detection_Set_Top_Region_N
+//Author : Tatiya, Rupesh
+//Description : Sets sub-region region N from Top region.
+
+
+ //TODO - remove one instruction here using arithmatic. -rT
+ mov (1) a0.0:uw 2688:uw
+ mov (1) a0.1:uw 1916:uw
+
+
+
+
+add (1) r7.7<1>:d ip:ud 32:d { NoCompact }
+ jmpi (1) DN_UV_NOISE_DETECTION_UV { NoCompact }
+
+
+
+//Module : DN_UV_Noise_Reduction_UV
+//Author : Tatiya, Rupesh
+//Description : Performs Noise Reduction on 16x16 U and 16x16 V.
+//Tasks : 1. Update weight history
+// 2. Find if it block is motion block
+// 3. Compute Denoised Pixel.
+
+
+
+
+//History is 1+1 byte every 4x4 U and 4x4 V.
+
+ cmp.l.f0.0 (16) null<1>:w ubHIST_UV(0,0)<16;16,1> r61.20<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w ubHIST_UV(0,0)<16;16,1> r61.22<0;2,1>:ub
+
+ mov (16) uwCURBE_TEMP(0)<1> 0:w
+ mov (16) uwCURBE_TEMP(1)<1> 0:w
+
+ //Compute diff betn curr and prev. - First 16 lines
+ // 8 lines here
+ add (16) wDIFF_TEMPORAL(0)<1> ubCURR_UV(2,2)<16;16,1> -ubPREV_UV(0,0)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(1)<1> ubCURR_UV(3,2)<16;16,1> -ubPREV_UV(0,16)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(2)<1> ubCURR_UV(4,2)<16;16,1> -ubPREV_UV(0,32)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(3)<1> ubCURR_UV(5,2)<16;16,1> -ubPREV_UV(0,48)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(4)<1> ubCURR_UV(6,2)<16;16,1> -ubPREV_UV(0,64)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(5)<1> ubCURR_UV(7,2)<16;16,1> -ubPREV_UV(0,80)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(6)<1> ubCURR_UV(8,2)<16;16,1> -ubPREV_UV(0,96)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(7)<1> ubCURR_UV(9,2)<16;16,1> -ubPREV_UV(0,112)<16;16,1> //Diff UV interleaved
+
+ //Update WT HIST
+ (-f0.0) shr (16) uwCURBE_TEMP(0)<1> ubHIST_UV(0,0)<16;16,1> 1:w
+ (f1.0) add (16) uwCURBE_TEMP(2)<1> ubHIST_UV(0,0)<16;16,1> r61.24<0;2,1>:ub
+ (f0.0) mov (16) uwCURBE_TEMP(2)<1> r61.20<0;2,1>:ub
+ (-f0.0.anyv) mov (16) uwCURBE_TEMP(2)<1> ubHIST_UV(0,0)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w ubHIST_UV(0,16)<16;16,1> r61.20<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w ubHIST_UV(0,16)<16;16,1> r61.22<0;2,1>:ub
+
+ //Compute diff betn curr and prev. - First 16 lines
+ // 8 more lines here
+ add (16) wDIFF_TEMPORAL(8)<1> ubCURR_UV(10,2)<16;16,1> -ubPREV_UV(0,128)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(9)<1> ubCURR_UV(11,2)<16;16,1> -ubPREV_UV(0,144)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(10)<1> ubCURR_UV(12,2)<16;16,1> -ubPREV_UV(0,160)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(11)<1> ubCURR_UV(13,2)<16;16,1> -ubPREV_UV(0,176)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(12)<1> ubCURR_UV(14,2)<16;16,1> -ubPREV_UV(0,192)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(13)<1> ubCURR_UV(15,2)<16;16,1> -ubPREV_UV(0,208)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(14)<1> ubCURR_UV(16,2)<16;16,1> -ubPREV_UV(0,224)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(15)<1> ubCURR_UV(17,2)<16;16,1> -ubPREV_UV(0,240)<16;16,1> //Diff UV interleaved
+
+ (-f0.0) shr (16) uwCURBE_TEMP(1)<1> ubHIST_UV(0,16)<16;16,1> 1:w
+ (f1.0) add (16) uwCURBE_TEMP(3)<1> ubHIST_UV(0,16)<16;16,1> r61.24<0;2,1>:ub
+ (f0.0) mov (16) uwCURBE_TEMP(3)<1> r61.20<0;2,1>:ub
+ (-f0.0.anyv) mov(16) uwCURBE_TEMP(3)<1> ubHIST_UV(0,16)<16;16,1>
+
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(0)<16;16,1> (abs)wDIFF_TEMPORAL(1)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(2)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(3)<16;16,1>
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(4)<16;16,1> (abs)wDIFF_TEMPORAL(5)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(6)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(7)<16;16,1>
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(8)<16;16,1> (abs)wDIFF_TEMPORAL(9)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(10)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(2)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(11)<16;16,1>
+ //16x16 to 16x4 - First 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(12)<16;16,1> (abs)wDIFF_TEMPORAL(13)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(14)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(3)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(15)<16;16,1>
+
+//Compute diff betn curr and prev. - Second 16 lines
+//13 lines.
+ add (16) wDIFF_TEMPORAL(16)<1> ubCURR_UV(22,2)<16;16,1> -ubPREV_UV(8,0)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(17)<1> ubCURR_UV(23,2)<16;16,1> -ubPREV_UV(8,16)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(18)<1> ubCURR_UV(24,2)<16;16,1> -ubPREV_UV(8,32)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(19)<1> ubCURR_UV(25,2)<16;16,1> -ubPREV_UV(8,48)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(20)<1> ubCURR_UV(26,2)<16;16,1> -ubPREV_UV(8,64)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(21)<1> ubCURR_UV(27,2)<16;16,1> -ubPREV_UV(8,80)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(22)<1> ubCURR_UV(28,2)<16;16,1> -ubPREV_UV(8,96)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(23)<1> ubCURR_UV(29,2)<16;16,1> -ubPREV_UV(8,112)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(24)<1> ubCURR_UV(30,2)<16;16,1> -ubPREV_UV(8,128)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(25)<1> ubCURR_UV(31,2)<16;16,1> -ubPREV_UV(8,144)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(26)<1> ubCURR_UV(32,2)<16;16,1> -ubPREV_UV(8,160)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(27)<1> ubCURR_UV(33,2)<16;16,1> -ubPREV_UV(8,176)<16;16,1> //Diff UV interleaved
+ add (16) wDIFF_TEMPORAL(28)<1> ubCURR_UV(34,2)<16;16,1> -ubPREV_UV(8,192)<16;16,1> //Diff UV interleaved
+
+//3 more lines
+ add (16) wCURBE_TEMP(4)<1> ubCURR_UV(35,2)<16;16,1> -ubPREV_UV(8,208)<16;16,1> //Diff UV interleaved
+ add (16) wCURBE_TEMP(5)<1> ubCURR_UV(36,2)<16;16,1> -ubPREV_UV(8,224)<16;16,1> //Diff UV interleaved
+ add (16) wCURBE_TEMP(6)<1> ubCURR_UV(37,2)<16;16,1> -ubPREV_UV(8,240)<16;16,1> //Diff UV interleaved
+
+ //16x4 to 8x4 - First 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+ //8x4 to 4x4 - First 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1> { AccWrEn }
+
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(16)<16;16,1> (abs)wDIFF_TEMPORAL(17)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(18)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(19)<16;16,1>
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(20)<16;16,1> (abs)wDIFF_TEMPORAL(21)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(22)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(23)<16;16,1>
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(24)<16;16,1> (abs)wDIFF_TEMPORAL(25)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(26)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(2)<1> acc0.0<16;16,1>:uw (abs)wDIFF_TEMPORAL(27)<16;16,1>
+
+ //16x16 to 16x4 - Second 16 lines
+ add (16) acc0.0<1>:uw (abs)wDIFF_TEMPORAL(28)<16;16,1> (abs)wCURBE_TEMP(4)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wCURBE_TEMP(5)<16;16,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(3)<1> acc0.0<16;16,1>:uw (abs)wCURBE_TEMP(6)<16;16,1>
+
+ //Find if block is motion block - First 16 lines
+ cmp.g.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_FINAL(0)<16;16,1> r61.26<0;2,1>:ub
+
+ //Move TEMPORAL_SUM4x4 for SIMD16 use later.
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,0)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,2)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,4)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,6)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,8)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,10)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,12)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(0,14)<0;2,1>
+
+ //Pick Appropriate Weight History Based on motion. - First 16 lines
+ (-f0.0) mov (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(2)<16;16,1>
+
+ //Actual DN - First 16 lines
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(0)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(0)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(2,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(2,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(2,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,0)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,8)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(0)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(0)<1> ubCURR_UV(2,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(0)<1> wDIFF_TEMPORAL(0)<16;16,1> ubCURR_UV(2,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(1)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(1)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(3,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(3,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(3,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,16)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,24)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(1)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(1)<1> ubCURR_UV(3,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(1)<1> wDIFF_TEMPORAL(1)<16;16,1> ubCURR_UV(3,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(2)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(2)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(4,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(4,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(4,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,32)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,40)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(2)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(2)<1> ubCURR_UV(4,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(2)<1> wDIFF_TEMPORAL(2)<16;16,1> ubCURR_UV(4,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(3)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(3)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(5,2)<8;8,1> -uwCURBE_TEMP(0,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(5,10)<8;8,1> -uwCURBE_TEMP(0,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(5,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,48)<8;8,1> uwCURBE_TEMP(0,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,56)<8;8,1> uwCURBE_TEMP(0,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(3)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(3)<1> ubCURR_UV(5,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(3)<1> wDIFF_TEMPORAL(3)<16;16,1> ubCURR_UV(5,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(4)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(4)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(6,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(6,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(6,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,64)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,72)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(4)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(4)<1> ubCURR_UV(6,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(4)<1> wDIFF_TEMPORAL(4)<16;16,1> ubCURR_UV(6,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(5)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(5)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(7,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(7,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(7,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,80)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,88)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(5)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(5)<1> ubCURR_UV(7,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(5)<1> wDIFF_TEMPORAL(5)<16;16,1> ubCURR_UV(7,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(6)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(6)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(8,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(8,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(8,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,96)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,104)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(6)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(6)<1> ubCURR_UV(8,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(6)<1> wDIFF_TEMPORAL(6)<16;16,1> ubCURR_UV(8,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(7)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(7)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(9,2)<8;8,1> -uwCURBE_TEMP(0,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(9,10)<8;8,1> -uwCURBE_TEMP(0,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(9,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,112)<8;8,1> uwCURBE_TEMP(0,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,120)<8;8,1> uwCURBE_TEMP(0,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(7)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(7)<1> ubCURR_UV(9,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(7)<1> wDIFF_TEMPORAL(7)<16;16,1> ubCURR_UV(9,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(8)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(8)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(10,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(10,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(10,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,128)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,136)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(8)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(8)<1> ubCURR_UV(10,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(8)<1> wDIFF_TEMPORAL(8)<16;16,1> ubCURR_UV(10,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(9)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(9)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(11,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(11,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(11,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,144)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,152)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(9)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(9)<1> ubCURR_UV(11,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(9)<1> wDIFF_TEMPORAL(9)<16;16,1> ubCURR_UV(11,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(10)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(10)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(12,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(12,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(12,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,160)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,168)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(10)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(10)<1> ubCURR_UV(12,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(10)<1> wDIFF_TEMPORAL(10)<16;16,1> ubCURR_UV(12,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(11)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(11)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(13,2)<8;8,1> -uwCURBE_TEMP(0,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(13,10)<8;8,1> -uwCURBE_TEMP(0,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(13,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,176)<8;8,1> uwCURBE_TEMP(0,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,184)<8;8,1> uwCURBE_TEMP(0,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(11)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(11)<1> ubCURR_UV(13,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(11)<1> wDIFF_TEMPORAL(11)<16;16,1> ubCURR_UV(13,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(12)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(12)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(14,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(14,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(14,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,192)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,200)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(12)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(12)<1> ubCURR_UV(14,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(12)<1> wDIFF_TEMPORAL(12)<16;16,1> ubCURR_UV(14,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(13)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(13)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(15,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(15,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(15,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,208)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,216)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(13)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(13)<1> ubCURR_UV(15,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(13)<1> wDIFF_TEMPORAL(13)<16;16,1> ubCURR_UV(15,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(14)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(14)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(16,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(16,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(16,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,224)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,232)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(14)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(14)<1> ubCURR_UV(16,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(14)<1> wDIFF_TEMPORAL(14)<16;16,1> ubCURR_UV(16,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(15)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(15)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(17,2)<8;8,1> -uwCURBE_TEMP(0,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(17,10)<8;8,1> -uwCURBE_TEMP(0,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(17,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(0,240)<8;8,1> uwCURBE_TEMP(0,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(0,248)<8;8,1> uwCURBE_TEMP(0,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(15)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(15)<1> ubCURR_UV(17,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(15)<1> wDIFF_TEMPORAL(15)<16;16,1> ubCURR_UV(17,2)<16;16,1>
+
+
+ //16x4 to 8x4 - Second 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4(0)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1>
+ add (16) uwDIFF_TEMPORAL_SUM4x4(1)<1> uwDIFF_TEMPORAL_SUM4x4(2,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(2,2)<4;2,1>
+
+ //8x4 to 4x4 - Second 16 lines
+ add (16) uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<1> uwDIFF_TEMPORAL_SUM4x4(0,0)<4;2,1> uwDIFF_TEMPORAL_SUM4x4(0,2)<4;2,1> { AccWrEn }
+
+ //Find if block is motion block - Second 16 lines
+ cmp.g.f1.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_FINAL(1)<16;16,1> r61.26<0;2,1>:ub
+
+ //Move TEMPORAL_SUM4x4 for SIMD16 use later.
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,0)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,2)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,4)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,6)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,8)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,10)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,0)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,12)<0;2,1>
+ mov (8) uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0,8)<1> uwDIFF_TEMPORAL_SUM4x4_FINAL(1,14)<0;2,1>
+
+ //Pick Appropriate Weight History Based on motion. - Second 16 lines
+ (-f1.0) mov (16) uwCURBE_TEMP(1)<1> uwCURBE_TEMP(3)<16;16,1>
+
+ //Actual DN - Second 16 lines
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(16)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(16)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(22,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(22,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(22,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,0)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,8)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(16)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(16)<1> ubCURR_UV(22,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(16)<1> wDIFF_TEMPORAL(16)<16;16,1> ubCURR_UV(22,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(17)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(17)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(23,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(23,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(23,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,16)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,24)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(17)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(17)<1> ubCURR_UV(23,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(17)<1> wDIFF_TEMPORAL(17)<16;16,1> ubCURR_UV(23,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(18)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(18)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(24,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(24,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(24,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,32)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,40)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(18)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(18)<1> ubCURR_UV(24,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(18)<1> wDIFF_TEMPORAL(18)<16;16,1> ubCURR_UV(24,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(19)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(19)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(25,2)<8;8,1> -uwCURBE_TEMP(1,0)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(25,10)<8;8,1> -uwCURBE_TEMP(1,2)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(25,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,48)<8;8,1> uwCURBE_TEMP(1,0)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,56)<8;8,1> uwCURBE_TEMP(1,2)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(19)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(19)<1> ubCURR_UV(25,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_0(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(19)<1> wDIFF_TEMPORAL(19)<16;16,1> ubCURR_UV(25,2)<16;16,1>
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(20)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(20)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(26,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(26,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(26,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,64)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,72)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(20)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(20)<1> ubCURR_UV(26,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(20)<1> wDIFF_TEMPORAL(20)<16;16,1> ubCURR_UV(26,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(21)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(21)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(27,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(27,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(27,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,80)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,88)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(21)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(21)<1> ubCURR_UV(27,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(21)<1> wDIFF_TEMPORAL(21)<16;16,1> ubCURR_UV(27,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(22)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(22)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(28,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(28,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(28,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,96)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,104)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(22)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(22)<1> ubCURR_UV(28,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(22)<1> wDIFF_TEMPORAL(22)<16;16,1> ubCURR_UV(28,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(23)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(23)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(29,2)<8;8,1> -uwCURBE_TEMP(1,4)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(29,10)<8;8,1> -uwCURBE_TEMP(1,6)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(29,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,112)<8;8,1> uwCURBE_TEMP(1,4)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,120)<8;8,1> uwCURBE_TEMP(1,6)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(23)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(23)<1> ubCURR_UV(29,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_1(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(23)<1> wDIFF_TEMPORAL(23)<16;16,1> ubCURR_UV(29,2)<16;16,1>
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(24)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(24)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(30,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(30,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(30,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,128)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,136)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(24)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(24)<1> ubCURR_UV(30,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(24)<1> wDIFF_TEMPORAL(24)<16;16,1> ubCURR_UV(30,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(25)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(25)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(31,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(31,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(31,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,144)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,152)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(25)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(25)<1> ubCURR_UV(31,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(25)<1> wDIFF_TEMPORAL(25)<16;16,1> ubCURR_UV(31,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(26)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(26)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(32,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(32,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(32,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,160)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,168)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(26)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(26)<1> ubCURR_UV(32,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(26)<1> wDIFF_TEMPORAL(26)<16;16,1> ubCURR_UV(32,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(27)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(27)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(33,2)<8;8,1> -uwCURBE_TEMP(1,8)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(33,10)<8;8,1> -uwCURBE_TEMP(1,10)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(33,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,176)<8;8,1> uwCURBE_TEMP(1,8)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,184)<8;8,1> uwCURBE_TEMP(1,10)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(27)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(27)<1> ubCURR_UV(33,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_2(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(27)<1> wDIFF_TEMPORAL(27)<16;16,1> ubCURR_UV(33,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(28)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wDIFF_TEMPORAL(28)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(34,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(34,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(34,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,192)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,200)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wDIFF_TEMPORAL(28)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wDIFF_TEMPORAL(28)<1> ubCURR_UV(34,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wDIFF_TEMPORAL(28)<1> wDIFF_TEMPORAL(28)<16;16,1> ubCURR_UV(34,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wCURBE_TEMP(4)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wCURBE_TEMP(4)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(35,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(35,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(35,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,208)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,216)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wCURBE_TEMP(4)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wCURBE_TEMP(4)<1> ubCURR_UV(35,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wCURBE_TEMP(4)<1> wCURBE_TEMP(4)<16;16,1> ubCURR_UV(35,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wCURBE_TEMP(5)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wCURBE_TEMP(5)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(36,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(36,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(36,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,224)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,232)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wCURBE_TEMP(5)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wCURBE_TEMP(5)<1> ubCURR_UV(36,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wCURBE_TEMP(5)<1> wCURBE_TEMP(5)<16;16,1> ubCURR_UV(36,2)<16;16,1>
+
+ cmp.l.f0.0 (16) null<1>:w (abs)wCURBE_TEMP(6)<16;16,1> r61.28<0;2,1>:ub
+ cmp.l.f1.0 (16) null<1>:w (abs)wCURBE_TEMP(6)<16;16,1> r61.30<0;2,1>:ub
+ mul (8) acc0.0<1>:w ubCURR_UV(37,2)<8;8,1> -uwCURBE_TEMP(1,12)<0;2,1>
+ mul (8) acc0.8<1>:w ubCURR_UV(37,10)<8;8,1> -uwCURBE_TEMP(1,14)<0;2,1>
+ mac (16) acc0<1>:w ubCURR_UV(37,2)<16;16,1> 256:w
+ mac (8) acc0.0<1>:w ubPREV_UV(8,240)<8;8,1> uwCURBE_TEMP(1,12)<0;2,1>
+ mac (8) acc0.8<1>:w ubPREV_UV(8,248)<8;8,1> uwCURBE_TEMP(1,14)<0;2,1>
+ add (16) acc0<1>:w acc0<16;16,1>:w 128:w
+ (f0.0) shr (16) wCURBE_TEMP(6)<1> acc0<16;16,1>:w 8:w
+ (-f0.0) mov (16) wCURBE_TEMP(6)<1> ubCURR_UV(37,2)<16;16,1>
+ cmp.le.f0.0 (16) null<1>:w uwDIFF_TEMPORAL_SUM4x4_TEMP_3(0)<16;16,1> r61.26<0;2,1>:ub
+ (-f0.0.allv) avg (16) wCURBE_TEMP(6)<1> wCURBE_TEMP(6)<16;16,1> ubCURR_UV(37,2)<16;16,1>
+
+ //Pack Weight History WORD -> BYTE
+ mov (16) ubCURBE_TEMP(3,0)<1> ubCURBE_TEMP(0)<32;16,2>
+ mov (16) ubCURBE_TEMP(3,16)<1> ubCURBE_TEMP(1)<32;16,2>
+
+
+
+//Module Name : DN_UV_Compute_BNE_UV
+//Author : Tatiya, Rupesh
+//Description : Computes minimum SOAD for each 16x4 block.
+
+ cmp.l.f0.0 (8) null:w uwSOAD_MIN_8x4(0,12)<16;4,1> uwSOAD_MIN_8x4(2,12)<16;4,1>
+ (f0.0)sel (8) uwCURBE_TEMP(1,0)<1> uwSOAD_MIN_8x4(0,12)<16;4,1> uwSOAD_MIN_8x4(2,12)<16;4,1>
+
+ mov (8) ubCURBE_TEMP(1)<1> ubCURBE_TEMP(1)<16;8,2>
+
+
+
+//File Name : DN_UV_PL3_Unpack_Denoised_UV.asm
+//Author : Tatiya, Rupesh
+//Description : Upack the interleaved UV data
+
+//First 16 lines.
+ mov (8) ubMSGPAYLOAD_U(0,0)<1> ubDIFF_TEMPORAL(0,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(0,16)<1> ubDIFF_TEMPORAL(1,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(0,0)<1> ubDIFF_TEMPORAL(0,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(0,16)<1> ubDIFF_TEMPORAL(1,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(1,0)<1> ubDIFF_TEMPORAL(2,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(1,16)<1> ubDIFF_TEMPORAL(3,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(1,0)<1> ubDIFF_TEMPORAL(2,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(1,16)<1> ubDIFF_TEMPORAL(3,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(2,0)<1> ubDIFF_TEMPORAL(4,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(2,16)<1> ubDIFF_TEMPORAL(5,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(2,0)<1> ubDIFF_TEMPORAL(4,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(2,16)<1> ubDIFF_TEMPORAL(5,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(3,0)<1> ubDIFF_TEMPORAL(6,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(3,16)<1> ubDIFF_TEMPORAL(7,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(3,0)<1> ubDIFF_TEMPORAL(6,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(3,16)<1> ubDIFF_TEMPORAL(7,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(4,0)<1> ubDIFF_TEMPORAL(8,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(4,16)<1> ubDIFF_TEMPORAL(9,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(4,0)<1> ubDIFF_TEMPORAL(8,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(4,16)<1> ubDIFF_TEMPORAL(9,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(5,0)<1> ubDIFF_TEMPORAL(10,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(5,16)<1> ubDIFF_TEMPORAL(11,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(5,0)<1> ubDIFF_TEMPORAL(10,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(5,16)<1> ubDIFF_TEMPORAL(11,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(6,0)<1> ubDIFF_TEMPORAL(12,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(6,16)<1> ubDIFF_TEMPORAL(13,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(6,0)<1> ubDIFF_TEMPORAL(12,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(6,16)<1> ubDIFF_TEMPORAL(13,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(7,0)<1> ubDIFF_TEMPORAL(14,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(7,16)<1> ubDIFF_TEMPORAL(15,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(7,0)<1> ubDIFF_TEMPORAL(14,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(7,16)<1> ubDIFF_TEMPORAL(15,2)<32;8,4>
+
+//Second 16 lines.
+//12 lines first
+ mov (8) ubMSGPAYLOAD_U(0,8)<1> ubDIFF_TEMPORAL(16,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(0,24)<1> ubDIFF_TEMPORAL(17,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(0,8)<1> ubDIFF_TEMPORAL(16,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(0,24)<1> ubDIFF_TEMPORAL(17,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(1,8)<1> ubDIFF_TEMPORAL(18,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(1,24)<1> ubDIFF_TEMPORAL(19,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(1,8)<1> ubDIFF_TEMPORAL(18,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(1,24)<1> ubDIFF_TEMPORAL(19,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(2,8)<1> ubDIFF_TEMPORAL(20,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(2,24)<1> ubDIFF_TEMPORAL(21,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(2,8)<1> ubDIFF_TEMPORAL(20,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(2,24)<1> ubDIFF_TEMPORAL(21,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(3,8)<1> ubDIFF_TEMPORAL(22,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(3,24)<1> ubDIFF_TEMPORAL(23,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(3,8)<1> ubDIFF_TEMPORAL(22,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(3,24)<1> ubDIFF_TEMPORAL(23,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(4,8)<1> ubDIFF_TEMPORAL(24,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(4,24)<1> ubDIFF_TEMPORAL(25,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(4,8)<1> ubDIFF_TEMPORAL(24,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(4,24)<1> ubDIFF_TEMPORAL(25,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(5,8)<1> ubDIFF_TEMPORAL(26,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(5,24)<1> ubDIFF_TEMPORAL(27,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(5,8)<1> ubDIFF_TEMPORAL(26,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(5,24)<1> ubDIFF_TEMPORAL(27,2)<32;8,4>
+
+ //3 lines next
+ mov (8) ubMSGPAYLOAD_U(6,8)<1> ubDIFF_TEMPORAL(28,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(6,24)<1> ubCURBE_TEMP(4,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(6,8)<1> ubDIFF_TEMPORAL(28,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(6,24)<1> ubCURBE_TEMP(4,2)<32;8,4>
+
+ mov (8) ubMSGPAYLOAD_U(7,8)<1> ubCURBE_TEMP(5,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_U(7,24)<1> ubCURBE_TEMP(6,0)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(7,8)<1> ubCURBE_TEMP(5,2)<32;8,4>
+ mov (8) ubMSGPAYLOAD_V(7,24)<1> ubCURBE_TEMP(6,2)<32;8,4>
+
+
+
+//Module Name : DN_UV_420_Save_Curr_Frame_Y
+//Author : Tatiya, Rupesh
+//Description : Save Curr Frame Y data for 420 Input
+
+
+
+//Module Name : DN_UV_Load_Curr_Frame_Y
+//Author : Tatiya, Rupesh
+//Description : Saves Y or YUY2 of Current frame.
+
+
+
+
+ mov (8) acc0.0<1>:ud r0.0<8;8,1>:ud
+ mov (2) acc0.0<1>:d r62.10<2;2,1>:w
+
+ mov (1) acc0.2<1>:d 0xF000F:ud
+
+ mov (8) r92.0<1>:ud acc0.0<8;8,1>:ud
+
+ mov (8) r101.0<1>:ud acc0.0<8;8,1>:ud
+ mov (8) r110.0<1>:ud acc0.0<8;8,1>:ud
+ mov (8) r119.0<1>:ud acc0.0<8;8,1>:ud
+
+ add (1) r101.1<1>:d acc0.1<0;1,0>:d 16:d
+
+ add (1) r110.0<1>:d acc0.0<0;1,0>:d 16:d
+
+ add (2) r119.0<1>:d acc0.0<2;2,1>:d 16:d
+
+ send (8) null<1>:d r92 0x5 0x120A8018:ud
+ send (8) null<1>:d r101 0x5 0x120A8018:ud
+ send (8) null<1>:d r110 0x5 0x120A8018:ud
+ send (8) null<1>:d r119 0x5 0x120A8018:ud
+
+
+ //TODO - See if History saving can be combined with Curr Frame Save. - rT
+
+
+//Module Name : DN_UV_Save_Hist_UV
+//Author : Tatiya, Rupesh
+//Description : Saves DN history for UV data.
+
+ mov (8) r3.0<1>:ud r0.0<8;8,1>:ud
+ mov (2) r3.0<1>:d r62.12<2;2,1>:w
+ mov (1) r3.2<1>:d 0x30007:ud
+
+ send (8) null<1>:d r3 0x5 0x40A8021:ud
+
+
+
+//Module Name : DN_UV_Save_BNE_UV
+//Author : Tatiya, Rupesh
+//Description : Saves BNE values for 16x16 U and 16x16 V.
+
+ mov (8) r1.0<1>:ud r0.0<8;8,1>:ud
+ mov (2) r1.0<1>:d r63.12<2;2,1>:w
+ mov (1) r1.2<1>:d 0x10003:ud
+
+ send (8) null<1>:d r1 0x5 0x40A8023:ud
+
+
+
+//File Name : DN_UV_PL3_Save_Curr_Frame_UV.asm
+//Author : Tatiya, Rupesh
+//Description : Save U and V data for PL3 surface
+
+
+
+//Module name : DN_UV_Save_Curr_Frame_UV
+//Author : Tatiya, Rupesh
+//Description : Saves Current Frame (UV only).
+
+
+
+
+ mov (8) r74<1>:ud r0.0<8;8,1>:ud
+ mov (8) r83<1>:ud r0.0<8;8,1>:ud
+
+ shr (2) r74.0<1>:d r62.10<2;2,1>:w 1:w
+ mov (1) r74.2<1>:d 0xF000F:ud
+
+ mov (8) r83.0<1>:ud r74.0<8;8,1>:ud
+
+ send (8) null<1>:d r74 0x5 0x120A8019:ud
+ send (8) null<1>:d r83 0x5 0x120A801A:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+ //All sub-routines here
+
+
+// Module Name : Noise_Detection
+// Author : Tatiya, Rupesh
+// Description : Performs noise detection on 32 pixels of U (8x4) and 32 pixels of V (8x4).
+
+DN_UV_NOISE_DETECTION_UV:
+
+// Find Field Block Median
+//
+// Purpose : Find the median value of the nine pixels in the same field
+// which are centered at current pixel.
+//
+// Works on 9 pixels centered at the current pixel
+// NOTE: pixels are within same field.
+// v4 - current pixel
+//
+// v2 v1 v0
+// * * * <--- Different field - not used
+// v5 v4 v3
+// * * * <--- Different field - not used
+// v8 v7 v6
+
+// Algorithm to find median modifies the data.
+// Copy the data needed to calculate median so the original source data stays intact.
+//
+
+//TODO - Change Interleaved implementation to separated one if - , does not work on predication. - rT
+
+//Delete Later - rT
+//mov (1) pCUR_UV:uw 52*32:uw
+
+// v0
+mov (16) ubMEDIAN_TEMP(0,0)<1> r[a0.0,0]<16;16,1>
+// v0
+mov (16) ubMEDIAN_TEMP(0,16)<1> r[a0.0,32]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(1,0)<1> r[a0.0,2]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(1,16)<1> r[a0.0,34]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(2,0)<1> r[a0.0,4]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(2,16)<1> r[a0.0,36]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(3,0)<1> r[a0.0,64]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(3,16)<1> r[a0.0,96]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(4,0)<1> r[a0.0,66]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(4,16)<1> r[a0.0,98]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(5,0)<1> r[a0.0,68]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(5,16)<1> r[a0.0,100]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(6,0)<1> r[a0.0,128]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(6,16)<1> r[a0.0,160]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(7,0)<1> r[a0.0,130]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(7,16)<1> r[a0.0,162]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(8,0)<1> r[a0.0,132]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(8,16)<1> r[a0.0,164]<16;16,1>
+
+//TODO - Optimize one instruction here.
+add (1) a0.0:uw a0.0<0;1,0>:uw 64:uw
+// v0
+mov (16) ubMEDIAN_TEMP(9,0)<1> r[a0.0,0]<16;16,1>
+// v0
+mov (16) ubMEDIAN_TEMP(9,16)<1> r[a0.0,32]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(10,0)<1> r[a0.0,2]<16;16,1>
+// v1
+mov (16) ubMEDIAN_TEMP(10,16)<1> r[a0.0,34]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(11,0)<1> r[a0.0,4]<16;16,1>
+// v2
+mov (16) ubMEDIAN_TEMP(11,16)<1> r[a0.0,36]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(12,0)<1> r[a0.0,64]<16;16,1>
+// v3
+mov (16) ubMEDIAN_TEMP(12,16)<1> r[a0.0,96]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(13,0)<1> r[a0.0,66]<16;16,1>
+// v4
+mov (16) ubMEDIAN_TEMP(13,16)<1> r[a0.0,98]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(14,0)<1> r[a0.0,68]<16;16,1>
+// v5
+mov (16) ubMEDIAN_TEMP(14,16)<1> r[a0.0,100]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(15,0)<1> r[a0.0,128]<16;16,1>
+// v6
+mov (16) ubMEDIAN_TEMP(15,16)<1> r[a0.0,160]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(16,0)<1> r[a0.0,130]<16;16,1>
+// v7
+mov (16) ubMEDIAN_TEMP(16,16)<1> r[a0.0,162]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(17,0)<1> r[a0.0,132]<16;16,1>
+// v8
+mov (16) ubMEDIAN_TEMP(17,16)<1> r[a0.0,164]<16;16,1>
+
+//TODO - Optimize one instruction here.
+add (1) a0.0:uw a0.0<0;1,0>:uw 64:uw
+
+// MedianSwap
+//
+// MedianSwap(inOutLeft, inOutRight)
+// {
+// if (inOutLeft > inOutRight)
+// {
+// temp = inOutLeft
+// inOutLeft = inOutRight
+// inOutRight = temp
+// }
+// }
+
+// MedianSwap(v1, v2) - U
+// MedianSwap(v4, v5) - U
+// MedianSwap(v1, v2) - V
+// MedianSwap(v4, v5) - V
+
+cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(1,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(5,0)<32;16,2>
+cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(1,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(1,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(1,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+(f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(5,0)<32;16,2>
+(f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+(f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubTEMP1(0,0)<16;16,1>
+(f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2> ubTEMP1(0,16)<16;16,1>
+(f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2> ubTEMP1(1,0)<16;16,1>
+(f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(7,0)<32;16,2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(0,0)<32;16,2> ubMEDIAN_TEMP(1,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(7,1)<32;16,2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(0,1)<32;16,2> ubMEDIAN_TEMP(1,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(7,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(0,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(7,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(0,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2> ubMEDIAN_TEMP(1,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2> ubMEDIAN_TEMP(1,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(1,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(1,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(3,0)<32;16,2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(6,0)<32;16,2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(3,1)<32;16,2> ubMEDIAN_TEMP(4,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(6,1)<32;16,2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(3,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(6,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(3,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(6,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(3,0)<2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(6,0)<2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(3,1)<2> ubMEDIAN_TEMP(4,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(6,1)<2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(4,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(1,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(1,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(1,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(1,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(1,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(1,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(5,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(5,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(5,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(7,0)<32;16,2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(0,0)<32;16,2> ubMEDIAN_TEMP(3,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(7,1)<32;16,2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(0,1)<32;16,2> ubMEDIAN_TEMP(3,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(7,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(0,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(7,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(0,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(7,0)<2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(0,0)<2> ubMEDIAN_TEMP(3,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(7,1)<2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(0,1)<2> ubMEDIAN_TEMP(3,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(3,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(3,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0 (16) null:w ubMEDIAN_TEMP(%1+0,0)<32;16,2> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0 (16) null:w ubMEDIAN_TEMP(%1+0,1)<32;16,2> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(5,0)<32;16,2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(5,1)<32;16,2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(5,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(5,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(5,0)<2> ubMEDIAN_TEMP(8,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(5,1)<2> ubMEDIAN_TEMP(8,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(8,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(7,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(8,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(7,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(3,0)<32;16,2> ubMEDIAN_TEMP(6,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(1,0)<32;16,2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(3,1)<32;16,2> ubMEDIAN_TEMP(6,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(1,1)<32;16,2> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(6,0)<2> ubMEDIAN_TEMP(3,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(1,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(6,1)<2> ubMEDIAN_TEMP(3,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(1,1)<32;16,2>
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(2,0)<32;16,2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(2,1)<32;16,2> ubMEDIAN_TEMP(5,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubMEDIAN_TEMP(5,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(7,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(2,1)<2> ubMEDIAN_TEMP(5,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(7,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(4,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(2,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(2,1)<2> ubTEMP1(0,16)<16;16,1>
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(6,0)<32;16,2> ubMEDIAN_TEMP(4,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(6,1)<32;16,2> ubMEDIAN_TEMP(4,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(6,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(6,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(4,0)<32;16,2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(4,1)<32;16,2> ubMEDIAN_TEMP(2,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(4,0)<2> ubMEDIAN_TEMP(2,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(4,1)<2> ubMEDIAN_TEMP(2,1)<32;16,2>
+cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(10,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(14,0)<32;16,2>
+cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(10,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(10,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(10,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+(f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(14,0)<32;16,2>
+(f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+(f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+(f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubTEMP1(0,0)<16;16,1>
+(f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2> ubTEMP1(0,16)<16;16,1>
+(f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2> ubTEMP1(1,0)<16;16,1>
+(f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// MedianSwap(v7, v8) - U
+// MedianSwap(v0, v1) - U
+// MedianSwap(v7, v8) - V
+// MedianSwap(v0, v1) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(16,0)<32;16,2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(9,0)<32;16,2> ubMEDIAN_TEMP(10,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(16,1)<32;16,2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(9,1)<32;16,2> ubMEDIAN_TEMP(10,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(16,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(9,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(16,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(9,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2> ubMEDIAN_TEMP(10,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2> ubMEDIAN_TEMP(10,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(10,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(10,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v3, v4) - U
+ // MedianSwap(v6, v7) - U
+ // MedianSwap(v3, v4) - V
+ // MedianSwap(v6, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(12,0)<32;16,2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(15,0)<32;16,2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(12,1)<32;16,2> ubMEDIAN_TEMP(13,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(15,1)<32;16,2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(12,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(15,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(12,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(15,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(12,0)<2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(15,0)<2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(12,1)<2> ubMEDIAN_TEMP(13,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(15,1)<2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(13,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v1, v2) - U
+ // MedianSwap(v4, v5) - U
+ // MedianSwap(v1, v2) - V
+ // MedianSwap(v4, v5) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(10,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(10,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(10,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(10,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(10,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(10,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(14,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(14,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(14,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // MedianSwap(v7, v8) - U
+ // MedianSwap(v0, v3) - U
+ // MedianSwap(v7, v8) - V
+ // MedianSwap(v0, v3) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(16,0)<32;16,2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(9,0)<32;16,2> ubMEDIAN_TEMP(12,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(16,1)<32;16,2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(9,1)<32;16,2> ubMEDIAN_TEMP(12,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(16,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(9,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(16,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(9,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(16,0)<2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(9,0)<2> ubMEDIAN_TEMP(12,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(16,1)<2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(9,1)<2> ubMEDIAN_TEMP(12,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(12,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(12,1)<2> ubTEMP1(1,16)<16;16,1>
+
+ // NOTE:
+ // Compare v0 to v6 to find the minimum.
+ // Store the minimum for future use.
+ //TODO - Find if MIN is needed.
+ //cmp.l.f0.0 (16) null:w ubMEDIAN_TEMP(%1+0,0)<32;16,2> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //cmp.l.f1.0 (16) null:w ubMEDIAN_TEMP(%1+0,1)<32;16,2> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+ //(f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,0)<32;16,2>
+ //(f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+0,1)<32;16,2>
+ //(-f0.0) mov (16) ubCURR_MIN(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,0)<32;16,2>
+ //(-f1.0) mov (16) ubCURR_MIN(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+6,1)<32;16,2>
+
+ // MedianSwap(v5, v8) - U
+ // MedianSwap(v4, v7) - U
+ // MedianSwap(v5, v8) - V
+ // MedianSwap(v4, v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(14,0)<32;16,2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(14,1)<32;16,2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(14,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(1,0)<1> ubMEDIAN_TEMP(14,1)<32;16,2>
+ mov (16) ubTEMP1(1,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(14,0)<2> ubMEDIAN_TEMP(17,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(14,1)<2> ubMEDIAN_TEMP(17,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(17,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(16,0)<2> ubTEMP1(0,16)<16;16,1>
+ (f1.0) mov (16) ubMEDIAN_TEMP(17,1)<2> ubTEMP1(1,0)<16;16,1>
+ (f1.1) mov (16) ubMEDIAN_TEMP(16,1)<2> ubTEMP1(1,16)<16;16,1>
+
+// NOTE:
+// Compare v2 to v8 to find the maximum.
+// Store the maximum for future use.
+ //TODO - Find if MAX is needed.
+// cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(%1+2,0)<32;16,2> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+// cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(%1+2,1)<32;16,2> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+//(f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,0)<32;16,2>
+//(f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+2,1)<32;16,2>
+//(-f0.0) mov (16) ubCURR_MAX(0,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,0)<32;16,2>
+//(-f1.0) mov (16) ubCURR_MAX(1,%2*16+0)<1> ubMEDIAN_TEMP(%1+8,1)<32;16,2>
+
+// MedianSwap(v3, v6) - U
+// MedianSwap(v1, v4) - U
+// MedianSwap(v3, v6) - V
+// MedianSwap(v1, v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(12,0)<32;16,2> ubMEDIAN_TEMP(15,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(10,0)<32;16,2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(12,1)<32;16,2> ubMEDIAN_TEMP(15,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(10,1)<32;16,2> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(15,0)<2> ubMEDIAN_TEMP(12,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(10,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(15,1)<2> ubMEDIAN_TEMP(12,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(10,1)<32;16,2>
+
+ // MedianSwap(v2,v5) - U
+ // MedianSwap(v4,v7) - U
+ // MedianSwap(v2,v5) - V
+ // MedianSwap(v4,v7) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(11,0)<32;16,2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ cmp.g.f1.0 (16) null:w ubMEDIAN_TEMP(11,1)<32;16,2> ubMEDIAN_TEMP(14,1)<32;16,2>
+ cmp.g.f1.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubMEDIAN_TEMP(14,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(16,0)<32;16,2>
+ (f1.0) mov (16) ubMEDIAN_TEMP(11,1)<2> ubMEDIAN_TEMP(14,1)<32;16,2>
+ (f1.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(16,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ mov (16) ubTEMP1(0,0)<1> ubMEDIAN_TEMP(13,0)<32;16,2>
+ mov (16) ubTEMP1(0,16)<1> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(11,0)<2> ubTEMP1(0,0)<16;16,1>
+ (f0.1) mov (16) ubMEDIAN_TEMP(11,1)<2> ubTEMP1(0,16)<16;16,1>
+
+ // MedianSwap(v6,v4) - U
+ // MedianSwap(v6,v4) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(15,0)<32;16,2> ubMEDIAN_TEMP(13,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(15,1)<32;16,2> ubMEDIAN_TEMP(13,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(15,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(15,1)<32;16,2>
+
+ // MedianSwap(v4,v2) - U
+ // MedianSwap(v4,v2) - V
+ cmp.g.f0.0 (16) null:w ubMEDIAN_TEMP(13,0)<32;16,2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ cmp.g.f0.1 (16) null:w ubMEDIAN_TEMP(13,1)<32;16,2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+ (f0.0) mov (16) ubMEDIAN_TEMP(13,0)<2> ubMEDIAN_TEMP(11,0)<32;16,2>
+ (f0.1) mov (16) ubMEDIAN_TEMP(13,1)<2> ubMEDIAN_TEMP(11,1)<32;16,2>
+
+// Sobel Value calculation for the current pixel v4
+// v2 v1 v0
+// * * * <--- Different field - not used
+// v5 v4 v3
+// * * * <--- Different field - not used
+// v8 v7 v6
+//
+// Gx = -v0 - 2*v3 - v6 + v2 + 2*v5 + v8
+// Gy = v0 + 2*v1 + v2 - v6 - 2*v7 - v8
+//
+// Sobel = (|Gx| + |Gy|) >> 3
+
+//TODO - Change Later - rT
+add (1) a0.0:uw a0.0<0;1,0>:uw -128:uw
+
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,64]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,132]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,0]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,128]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,4]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(0)<1> r[a0.0,68]<16;16,1>:ub 2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,96]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,164]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,32]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,160]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,36]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(1)<1> r[a0.0,100]<16;16,1>:ub 2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,128]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,196]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,64]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,192]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,68]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(2)<1> r[a0.0,132]<16;16,1>:ub 2:w
+// - 2 * v3
+mul (16) acc0.0<1>:w r[a0.0,160]<16;16,1>:ub -2:w
+// + v8
+mac (16) acc0.0<1>:w r[a0.0,228]<16;16,1>:ub 1:w
+// - v0
+mac (16) acc0.0<1>:w r[a0.0,96]<16;16,1>:ub -1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,224]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,100]<16;16,1>:ub 1:w
+// + 2 * v5
+mac (16) wSOBEL_X(3)<1> r[a0.0,164]<16;16,1>:ub 2:w
+
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,2]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,0]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,132]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,4]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,128]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,130]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(0)<16;16,1>
+
+shr (16) uwSOBEL(0)<1> acc0.0<16;16,1>:uw 3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,34]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,32]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,164]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,36]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,160]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,162]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(1)<16;16,1>
+
+shr (16) uwSOBEL(1)<1> acc0.0<16;16,1>:uw 3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,66]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,64]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,196]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,68]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,192]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,194]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(2)<16;16,1>
+
+shr (16) uwSOBEL(2)<1> acc0.0<16;16,1>:uw 3:uw
+// + 2 * v1
+mul (16) acc0.0<1>:w r[a0.0,98]<16;16,1>:ub 2:w
+// + v0
+mac (16) acc0.0<1>:w r[a0.0,96]<16;16,1>:ub 1:w
+// - v8
+mac (16) acc0.0<1>:w r[a0.0,228]<16;16,1>:ub -1:w
+// + v2
+mac (16) acc0.0<1>:w r[a0.0,100]<16;16,1>:ub 1:w
+// - v6
+mac (16) acc0.0<1>:w r[a0.0,224]<16;16,1>:ub -1:w
+// - 2 * v7
+mac (16) acc0.0<1>:w r[a0.0,226]<16;16,1>:ub -2:w
+
+add (16) acc0.0<1>:uw (abs)acc0.0<16;16,1>:w (abs)wSOBEL_X(3)<16;16,1>
+
+shr (16) uwSOBEL(3)<1> acc0.0<16;16,1>:uw 3:uw
+
+//Mov Median in CURBE_TEMP to free up temp space.
+mov (16) ubMEDIAN(0,0)<1> ubMEDIAN_TEMP(4,0)<16;16,1>
+mov (16) ubMEDIAN(0,16)<1> ubMEDIAN_TEMP(4,16)<16;16,1>
+mov (16) ubMEDIAN(0,32)<1> ubMEDIAN_TEMP(13,0)<16;16,1>
+mov (16) ubMEDIAN(0,48)<1> ubMEDIAN_TEMP(13,16)<16;16,1>
+
+// Find:
+// absDiff = abs(ubCurY - ubMedian)
+// Find the difference between pixel and median value.
+
+//Median is interleaved. So difference is also interleaved.
+
+//------------------------------------------------------------------------------------------
+//Process 16 U and 16 V pixels here and rest later.
+// first row - v0,v1,v2
+add (16) wDIFF(0)<1> r[a0.0,0]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(1)<1> r[a0.0,2]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(2)<1> r[a0.0,4]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(3)<1> r[a0.0,64]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(4)<1> r[a0.0,66]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(5)<1> r[a0.0,68]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(6)<1> r[a0.0,128]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(7)<1> r[a0.0,130]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+add (16) wDIFF(8)<1> r[a0.0,132]<16;16,1>:ub -ubMEDIAN(0,0)<16;16,1>
+// first row - v0,v1,v2
+add (16) wDIFF(9)<1> r[a0.0,32]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(10)<1> r[a0.0,34]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(11)<1> r[a0.0,36]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+
+// second row - v3,v4,v5
+add (16) wDIFF(12)<1> r[a0.0,96]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(13)<1> r[a0.0,98]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(14)<1> r[a0.0,100]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+
+// third row - v6,v7,v8
+add (16) wDIFF(15)<1> r[a0.0,160]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(16)<1> r[a0.0,162]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+add (16) wDIFF(17)<1> r[a0.0,164]<16;16,1>:ub -ubMEDIAN(0,16)<16;16,1>
+
+//TODO - Change Later - rT
+add (1) a0.0:uw a0.0<0;1,0>:uw 64:uw
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//First 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(2)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(3)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(4)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(5)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(6)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(7)<16;16,1>
+ add (16) uwSOAD(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//------------
+ //DIFF(0-7) is not needed here. Populate it.
+ // first row - v0,v1,v2
+ add (16) wDIFF(0)<1> r[a0.0,0]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(1)<1> r[a0.0,2]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(2)<1> r[a0.0,4]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+
+ // second row - v3,v4,v5
+ add (16) wDIFF(3)<1> r[a0.0,64]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(4)<1> r[a0.0,66]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(5)<1> r[a0.0,68]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+
+ // third row - v6,v7
+ add (16) wDIFF(6)<1> r[a0.0,128]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+ add (16) wDIFF(7)<1> r[a0.0,130]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(0)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+
+//------------
+ //Load v8 - DIFF(8)
+ add (16) wDIFF(8)<1> r[a0.0,132]<16;16,1>:ub -ubMEDIAN(1,0)<16;16,1>
+//------------
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(11)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(12)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(13)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(14)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(15)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(16)<16;16,1>
+ add (16) uwSOAD(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//------------
+ //DIFF(0-7) is not needed here. Populate it.
+ // first row - v0,v1,v2
+ add (16) wDIFF(9)<1> r[a0.0,32]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(10)<1> r[a0.0,34]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(11)<1> r[a0.0,36]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+
+ // second row - v3,v4,v5
+ add (16) wDIFF(12)<1> r[a0.0,96]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(13)<1> r[a0.0,98]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(14)<1> r[a0.0,100]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+
+ // third row - v6,v7
+ add (16) wDIFF(15)<1> r[a0.0,160]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+ add (16) wDIFF(16)<1> r[a0.0,162]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(1)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+
+//------------
+ //Load v8 - DIFF(8)
+ add (16) wDIFF(17)<1> r[a0.0,164]<16;16,1>:ub -ubMEDIAN(1,16)<16;16,1>
+//------------
+
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max-block_min) < m_LocalDiffThreshold))
+// if (sigma_mb_min > sigma)
+// sigma_mb_min = sigma;
+
+//NOTE: block_min is always zero as median is one of the value in 3x3 block. So no need o calculate it.
+// So just do -
+//if ((sobel_edge_measure < m_SobelEdgeThreshold) && ((block_max) < m_LocalDiffThreshold) && ( sigma < sigma_mb_min))
+// sigma_mb_min = sigma;
+
+//We are processing 32 bytes of U and 32 bytes of V - each of size 8x4.
+//Compare first 8 bytes with max possible (255).
+//Start above condition from second 8 bytes.
+
+//TODO - Change Later - rT
+// mov (1) pCUR_MIN_SOAD_8x4:uw 1752:uw //r54.24:ub
+
+//First row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(0)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(0)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(0)<16;16,1> 255:uw
+(f0.0) sel (16) uwSOBEL(0)<1> uwSOAD(0)<16;16,1> 255:uw
+
+//Second row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(1)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(1)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(1)<16;16,1> uwSOBEL(0)<16;16,1>
+(f0.0) mov (16) uwSOBEL(0)<1> uwSOAD(1)<16;16,1>
+
+// Find sum of all absolute differences AND
+// maximum absolute difference for 16 U and 16 V here.
+//Second 2 rows of 8x4
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(2)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(3)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(4)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(5)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(6)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(7)<16;16,1>
+ add (16) uwSOAD(0)<1> acc0.0<16;16,1>:uw (abs)wDIFF(8)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(0)<16;16,1> (abs)wDIFF(1)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(2)<16;16,1> (abs)wDIFF(3)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(4)<16;16,1> (abs)wDIFF(5)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(6)<16;16,1> (abs)wDIFF(7)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(0)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(8)<16;16,1>
+//Compare 0-1, 2-3, 4-5, 6-7
+cmp.g.f0.0 (16) null:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+cmp.g.f0.1 (16) null:uw (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+cmp.g.f1.0 (16) null:uw (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+cmp.g.f1.1 (16) null:uw (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//Calculate SAD
+ add (16) acc0.0<1>:uw (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(11)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(12)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(13)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(14)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(15)<16;16,1>
+ add (16) acc0.0<1>:uw acc0.0<16;16,1>:uw (abs)wDIFF(16)<16;16,1>
+ add (16) uwSOAD(1)<1> acc0.0<16;16,1>:uw (abs)wDIFF(17)<16;16,1>
+
+(f0.0) sel (16) uwCURBE_TEMP(0)<1> (abs)wDIFF(9)<16;16,1> (abs)wDIFF(10)<16;16,1>
+(f0.1) sel (16) uwCURBE_TEMP(1)<1> (abs)wDIFF(11)<16;16,1> (abs)wDIFF(12)<16;16,1>
+(f1.0) sel (16) uwCURBE_TEMP(2)<1> (abs)wDIFF(13)<16;16,1> (abs)wDIFF(14)<16;16,1>
+(f1.1) sel (16) uwCURBE_TEMP(3)<1> (abs)wDIFF(15)<16;16,1> (abs)wDIFF(16)<16;16,1>
+
+//Compare Max(0,1) - Max(2,3), Max(4,5) - Max(6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+cmp.g.f0.1 (16) null:uw uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(1)<16;16,1>
+(f0.1)sel (16) uwCURBE_TEMP(2)<1> uwCURBE_TEMP(2)<16;16,1> uwCURBE_TEMP(3)<16;16,1>
+
+//Compare Max(0,1,2,3) - Max(4,5,6,7)
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+(f0.0)sel (16) uwCURBE_TEMP(0)<1> uwCURBE_TEMP(0)<16;16,1> uwCURBE_TEMP(2)<16;16,1>
+
+//Compare Max(0,1,2,3,4,5,6,7) - 8
+cmp.g.f0.0 (16) null:uw uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+(f0.0)sel (16) uwMAX_ABS_DIFF(1)<1> uwCURBE_TEMP(0)<16;16,1> (abs)wDIFF(17)<16;16,1>
+
+//Third row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(2)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(0)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(0)<16;16,1> uwSOBEL(0)<16;16,1>
+(f0.0) mov (16) uwSOBEL(0)<1> uwSOAD(0)<16;16,1>
+
+//Fourth row of 8x4
+ cmp.l.f0.0 (16) null:uw uwSOBEL(3)<16;16,1> r55.30<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwMAX_ABS_DIFF(1)<16;16,1> r55.28<0;2,1>:ub
+(f0.0) cmp.l.f0.0 (16) null:uw uwSOAD(1)<16;16,1> uwSOBEL(0)<16;16,1>
+(f0.0) mov (16) uwSOBEL(0)<1> uwSOAD(1)<16;16,1>
+
+ cmp.l.f0.0 (8) null:uw uwSOBEL(0,0)<8;8,1> uwSOBEL(0,8)<8;8,1>
+(f0.0) sel (8) uwSOBEL(0)<1> uwSOBEL(0,0)<8;8,1> uwSOBEL(0,8)<8;8,1>
+
+ cmp.l.f0.0 (4) null:uw uwSOBEL(0,0)<4;4,1> uwSOBEL(0,4)<4;4,1>
+(f0.0) sel (4) uwSOBEL(0)<1> uwSOBEL(0,0)<4;4,1> uwSOBEL(0,4)<4;4,1>
+
+ cmp.l.f0.0 (2) null:uw uwSOBEL(0,0)<2;2,1> uwSOBEL(0,2)<2;2,1>
+(f0.0) sel (2) r[a0.1,0]<1>:uw uwSOBEL(0,0)<2;2,1> uwSOBEL(0,2)<2;2,1>
+
+
+
+
+
+
+// End of common.inc
+
+mov (1) ip:ud r7.7<0;1,0>:d
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL3_DN_422CP.g4a b/src/shaders/post_processing/gen7/PL3_DN_422CP.g4a
new file mode 100644
index 0000000..8192108
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_DN_422CP.g4a
@@ -0,0 +1,544 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 117 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PL3_DN_422CP
+.code
+
+
+
+// FileName: DN_PL_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block) for planar format
+
+
+
+// FileName: DN.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x45E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(4,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+
+ mov (2) mudMSGHDR_HIST(1)<1> udDNDI_RESP(4,0)<2;2,1> // Move denoise history to MRF (4x2)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x10003:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x50003:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | X | X | X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ //| X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ mov (1) mubMSGHDR_ENC_STATS(1,0)<1> ubDNDI_RESP(4,8)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,3)<1> uwDNDI_RESP(4,11)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,4)<1> uwDNDI_RESP(4,12)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,9)<1> uwDNDI_RESP(4,8)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,10)<1> uwDNDI_RESP(4,9)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Load_UV_IMC3_16x8.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x8 block through DATAPORT
+
+
+
+// FileName: UVCopy_Load_16x8.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x8 block through DATAPORT
+
+//CHANGE : Read extra UV data to convert to 422. -rT
+//we are reading extra data in ALL cases irrespective of whether upsampling is reqd or not later on, to keep things simple.
+
+
+ add (2) r27.0<1>:d r7.0<2;2,1>:w r4.4<2;2,1>:w // Source Y Block origin
+ asr (2) r27.0<1>:d r27.0<2;2,1>:d 1:w { NoDDClr } // U/V block origin should be half of Y's
+ mov (1) r27.2<1>:ud 0x40007:ud { NoDDChk } // U/V block width and height (8x5)
+ mov (8) r36<1>:ud r27.0<8;8,1>:ud
+ mov (8) r38<1>:ud r27.0<8;8,1>:ud
+ send (8) udDNDI_UV_RESP(0)<1> r36 0x4 0x2290001:ud
+ send (8) udDNDI_UV_RESP(2)<1> r38 0x4 0x2290002:ud
+
+ //Update Header for Save
+ mov (1) mudMSGHDR_UCOPY(0,2)<1> 0x30007:ud // U block width and height (8x4)
+ mov (1) mudMSGHDR_VCOPY(0,2)<1> 0x30007:ud // V block width and height (8x4)
+
+
+
+// FileName: DN_Save_Y_16x8.asm
+// Author: Vivek Kumar
+// Description: Save one 16x8 blocks of Y channel of DN output for reference
+
+
+mov (8) mudDN_Y_OUT(0,0)<1> r0<8;8,1>:ud // message header
+mov (2) mudDN_Y_OUT(0,0)<1> r7.0<2;2,1>:w { NoDDClr } // X origin
+mov (1) mudDN_Y_OUT(0,2)<1> 0x7000F:ud { NoDDChk } // block width and height (16x8)
+
+//send out data through data port
+send (8) null<1>:d mudDN_Y_OUT 0x5 0xA0A8018:ud
+
+
+
+// FileName: DN_Save_UV_IMC3_16x8.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x8 block through DATAPORT
+
+
+
+// FileName: UVCopy_Save_16x8.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x8 block through DATAPORT
+
+
+//Reuse the header from Load component
+//Header is modified at the end of load - to be usable for save.
+
+
+ mov (8) mudMSGHDR_UCOPY(1)<1> udDNDI_UV_RESP(0)<8;8,1>
+ mov (8) mudMSGHDR_VCOPY(1)<1> udDNDI_UV_RESP(2)<8;8,1>
+ send (4) null<1>:d r36 0x5 0x40A8019:ud
+ send (4) null<1>:d r38 0x5 0x40A801A:ud
+
+
+
+// FileName: DN_Upsample_UV_IMC3_16x8.asm
+// Author: Tatiya, Rupesh
+// Description: Upconvert 420 UV to 422
+
+
+
+// FileName: UVCopy_Upsample_UV_16x8.asm
+// Author: Tatiya, Rupesh
+// Description: Convert 42X UV to 422 - to be used for IECP.
+
+
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(0) ubDNDI_UV_RESP(0,0)<0;8,1> ubDNDI_UV_RESP(0,0)<8;8,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(4) ubDNDI_UV_RESP(2,0)<0;8,1> ubDNDI_UV_RESP(2,0)<8;8,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(1) ubDNDI_UV_RESP(0,8)<0;8,1> ubDNDI_UV_RESP(0,8)<8;8,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(5) ubDNDI_UV_RESP(2,8)<0;8,1> ubDNDI_UV_RESP(2,8)<8;8,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(2) ubDNDI_UV_RESP(0,16)<0;8,1> ubDNDI_UV_RESP(0,16)<8;8,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(6) ubDNDI_UV_RESP(2,16)<0;8,1> ubDNDI_UV_RESP(2,16)<8;8,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(3) ubDNDI_UV_RESP(0,24)<0;8,1> ubDNDI_UV_RESP(0,24)<8;8,1>
+ avg.sat (16) uwDNDI_UVCOPY_TEMP(7) ubDNDI_UV_RESP(2,24)<0;8,1> ubDNDI_UV_RESP(2,24)<8;8,1>
+
+ mov (16) ubDNDI_RESP(5, 1)<2> ubDNDI_UVCOPY_TEMP(0,0)<32;16,2> { NoDDClr }
+ mov (16) ubDNDI_RESP(5, 0)<2> ubDNDI_UVCOPY_TEMP(4,0)<32;16,2> { NoDDChk }
+ mov (16) ubDNDI_RESP(5, 33)<2> ubDNDI_UVCOPY_TEMP(1,0)<32;16,2> { NoDDClr }
+ mov (16) ubDNDI_RESP(5, 32)<2> ubDNDI_UVCOPY_TEMP(5,0)<32;16,2> { NoDDChk }
+ mov (16) ubDNDI_RESP(5, 65)<2> ubDNDI_UVCOPY_TEMP(2,0)<32;16,2> { NoDDClr }
+ mov (16) ubDNDI_RESP(5, 64)<2> ubDNDI_UVCOPY_TEMP(6,0)<32;16,2> { NoDDChk }
+ mov (16) ubDNDI_RESP(5, 97)<2> ubDNDI_UVCOPY_TEMP(3,0)<32;16,2> { NoDDClr }
+ mov (16) ubDNDI_RESP(5, 96)<2> ubDNDI_UVCOPY_TEMP(7,0)<32;16,2> { NoDDChk }
+
+
+
+// FileName: DN_Save_422CP_16x8.asm
+// Author: Vivek Kumar
+// Description: Save one 16x8 blocks of DN output to the color pipe in 4-2-2 format
+
+
+.declare mubMSGHDR_DN_OUT_2 Base=r36.0 ElementSize=1 Type=ub
+
+
+mov (8) mudMSGHDR_DN_OUT(0)<1> r0<8;8,1>:ud // message header
+shl (1) mdMSGHDR_DN_OUT(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin * 2 (422 output)
+mov (1) mdMSGHDR_DN_OUT(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_DN_OUT(0,2)<1> 0x7000F:ud { NoDDClr, NoDDChk } // block width and height (16x8)
+
+//M0.3 - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1) mudMSGHDR_DN_OUT(0,3)<1> r2.4<0;1,0>:ud r7.26<0;1,0>:b { NoDDChk }
+
+// First 8 x 8 Block
+ mov (8) mubMSGHDR_DN_OUT(1)<2> ubDNDI_RESP(0,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(1,16)<2> ubDNDI_RESP(0,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(2)<2> ubDNDI_RESP(0,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(2,16)<2> ubDNDI_RESP(0,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(3)<2> ubDNDI_RESP(0,64)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(3,16)<2> ubDNDI_RESP(0,80)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(4)<2> ubDNDI_RESP(0,96)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT(4,16)<2> ubDNDI_RESP(0,112)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(1,1)<4> ubDNDI_RESP(5,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(1,17)<4> ubDNDI_RESP(5,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(1,3)<4> ubDNDI_RESP(5,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(1,19)<4> ubDNDI_RESP(5,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(2,1)<4> ubDNDI_RESP(5,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(2,17)<4> ubDNDI_RESP(5,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(2,3)<4> ubDNDI_RESP(5,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(2,19)<4> ubDNDI_RESP(5,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(3,1)<4> ubDNDI_RESP(5,65)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(3,17)<4> ubDNDI_RESP(5,81)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(3,3)<4> ubDNDI_RESP(5,64)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(3,19)<4> ubDNDI_RESP(5,80)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(4,1)<4> ubDNDI_RESP(5,97)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(4,17)<4> ubDNDI_RESP(5,113)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT(4,3)<4> ubDNDI_RESP(5,96)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT(4,19)<4> ubDNDI_RESP(5,112)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+// Second 8 x 8 Block
+mov (8) r36.0<1>:ud r31.0<8;8,1>:ud
+add (1) r36.0<1>:ud r36.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DN_OUT_2(1)<2> ubDNDI_RESP(0,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(1,16)<2> ubDNDI_RESP(0,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(2)<2> ubDNDI_RESP(0,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(2,16)<2> ubDNDI_RESP(0,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(3)<2> ubDNDI_RESP(0,72)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(3,16)<2> ubDNDI_RESP(0,88)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(4)<2> ubDNDI_RESP(0,104)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DN_OUT_2(4,16)<2> ubDNDI_RESP(0,120)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(1,1)<4> ubDNDI_RESP(5,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(1,17)<4> ubDNDI_RESP(5,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(1,3)<4> ubDNDI_RESP(5,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(1,19)<4> ubDNDI_RESP(5,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(2,1)<4> ubDNDI_RESP(5,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(2,17)<4> ubDNDI_RESP(5,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(2,3)<4> ubDNDI_RESP(5,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(2,19)<4> ubDNDI_RESP(5,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(3,1)<4> ubDNDI_RESP(5,73)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(3,17)<4> ubDNDI_RESP(5,89)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(3,3)<4> ubDNDI_RESP(5,72)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(3,19)<4> ubDNDI_RESP(5,88)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(4,1)<4> ubDNDI_RESP(5,105)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(4,17)<4> ubDNDI_RESP(5,121)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DN_OUT_2(4,3)<4> ubDNDI_RESP(5,104)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DN_OUT_2(4,19)<4> ubDNDI_RESP(5,120)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+//send out data through data port
+send (8) null<1>:d r31.0 0x5 0xA0A801B:ud
+send (8) null<1>:d r36.0 0x5 0xA0A801B:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL3_DN_PL3.g4a b/src/shaders/post_processing/gen7/PL3_DN_PL3.g4a
new file mode 100644
index 0000000..baeb2d7
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL3_DN_PL3.g4a
@@ -0,0 +1,425 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 44 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PL3_DN_PL3
+.code
+
+
+
+// FileName: DN_PL_Core.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block) for planar format
+
+
+
+// FileName: DN.asm
+// Author: Vivek Kumar
+// Description: Tasks for DN only case (16x8 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x45E8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(4,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DN_Hist_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DN history data to statistics surface
+
+// Write denoise history to memory
+mov (8) r27<1>:ud r0.0<8;8,1>:ud // message header
+
+
+ mov (2) mudMSGHDR_HIST(1)<1> udDNDI_RESP(4,0)<2;2,1> // Move denoise history to MRF (4x2)
+
+
+shr (2) r27.0<1>:ud r7.0<2;2,1>:w 2:w // X,Y origin / 4
+add (1) r27.0<1>:ud r27.0<0;1,0>:ud r1.12<0;1,0>:uw { NoDDClr } // Add pitch to X origin
+mov (1) r27.2<1>:ud 0x10003:ud { NoDDChk } // block width and height
+
+mov (8) mudMSGHDR_HIST(0)<1> r27.0<8;8,1>:ud
+send (8) null<1>:d r22 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x50003:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | X | X | X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ //| X | X |
+ //----------------------------------------------------
+ //| X | SVCM | X |
+ //----------------------------------------------------
+ //| SHCM | STAD | X |
+ //----------------------------------------------------
+ mov (1) mubMSGHDR_ENC_STATS(1,0)<1> ubDNDI_RESP(4,8)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,3)<1> uwDNDI_RESP(4,11)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,4)<1> uwDNDI_RESP(4,12)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (1) muwMSGHDR_ENC_STATS(1,9)<1> uwDNDI_RESP(4,8)<0;1,0> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) muwMSGHDR_ENC_STATS(1,10)<1> uwDNDI_RESP(4,9)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DN_Load_UV_IMC3_16x8.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x8 block through DATAPORT
+
+
+
+// FileName: UVCopy_Load_16x8.asm
+// Author: Vivek Kumar
+// Description: Read UV for 16x8 block through DATAPORT
+
+//CHANGE : Read extra UV data to convert to 422. -rT
+//we are reading extra data in ALL cases irrespective of whether upsampling is reqd or not later on, to keep things simple.
+
+
+ add (2) r27.0<1>:d r7.0<2;2,1>:w r4.4<2;2,1>:w // Source Y Block origin
+ asr (2) r27.0<1>:d r27.0<2;2,1>:d 1:w { NoDDClr } // U/V block origin should be half of Y's
+ mov (1) r27.2<1>:ud 0x40007:ud { NoDDChk } // U/V block width and height (8x5)
+ mov (8) r36<1>:ud r27.0<8;8,1>:ud
+ mov (8) r38<1>:ud r27.0<8;8,1>:ud
+ send (8) udDNDI_UV_RESP(0)<1> r36 0x4 0x2290001:ud
+ send (8) udDNDI_UV_RESP(2)<1> r38 0x4 0x2290002:ud
+
+ //Update Header for Save
+ mov (1) mudMSGHDR_UCOPY(0,2)<1> 0x30007:ud // U block width and height (8x4)
+ mov (1) mudMSGHDR_VCOPY(0,2)<1> 0x30007:ud // V block width and height (8x4)
+
+
+
+// FileName: DN_Save_Y_16x8.asm
+// Author: Vivek Kumar
+// Description: Save one 16x8 blocks of Y channel of DN output for reference
+
+
+mov (8) mudDN_Y_OUT(0,0)<1> r0<8;8,1>:ud // message header
+mov (2) mudDN_Y_OUT(0,0)<1> r7.0<2;2,1>:w { NoDDClr } // X origin
+mov (1) mudDN_Y_OUT(0,2)<1> 0x7000F:ud { NoDDChk } // block width and height (16x8)
+
+//send out data through data port
+send (8) null<1>:d mudDN_Y_OUT 0x5 0xA0A8018:ud
+
+
+
+// FileName: DN_Save_UV_IMC3_16x8.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x8 block through DATAPORT
+
+
+
+// FileName: UVCopy_Save_16x8.asm
+// Author: Vivek Kumar
+// Description: Save UV for 16x8 block through DATAPORT
+
+
+//Reuse the header from Load component
+//Header is modified at the end of load - to be usable for save.
+
+
+ mov (8) mudMSGHDR_UCOPY(1)<1> udDNDI_UV_RESP(0)<8;8,1>
+ mov (8) mudMSGHDR_VCOPY(1)<1> udDNDI_UV_RESP(2)<8;8,1>
+ send (4) null<1>:d r36 0x5 0x40A8019:ud
+ send (4) null<1>:d r38 0x5 0x40A801A:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL_DI_422CP.g4a b/src/shaders/post_processing/gen7/PL_DI_422CP.g4a
new file mode 100644
index 0000000..87db22f
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL_DI_422CP.g4a
@@ -0,0 +1,461 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 87 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PL_DI_422CP
+.code
+
+
+
+// FileName: DI.asm
+// Author: Vivek Kumar
+// Description: Tasks for DI only case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DI_Save_422CP_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in 422 format to Color Pipe (IECP)
+
+
+.declare mubMSGHDR_DI_OUT1_1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT1_2 Base=r21.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_1 Base=r24.0 ElementSize=1 Type=ub
+
+
+.declare mubMSGHDR_DI_OUT2_2 Base=r27.0 ElementSize=1 Type=ub
+
+
+mov (8) r27.0<1>:ud r0.0<8;8,1>:ud
+shl (1) r27.0<1>:ud r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r27.1<1>:ud r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r27.2<1>:ud 0x3000F:ud { NoDDClr, NoDDChk } // Block width and height (16x8)
+
+//M0.3 - 0 - CP Enable, 1 - Area of Interest, 3:2 Message Format(TBD), 4:3 - Ignored, 31:5 CP state pointer
+//Compose area-of-interest bit + color pipe state pointer
+or (1) r27.3<1>:ud r2.4<0;1,0>:ud r7.26<0;1,0>:b { NoDDChk }
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r24.0<1>:ud r27<8;8,1>:ud
+
+
+// Pack 2nd field Y; First 8x4 block
+ mov (8) mubMSGHDR_DI_OUT1_1(1)<2> ubDNDI_RESP(0,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(1,16)<2> ubDNDI_RESP(0,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(2)<2> ubDNDI_RESP(0,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_1(2,16)<2> ubDNDI_RESP(0,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; First 8x4 block
+ mov (4) mubMSGHDR_DI_OUT1_1(1,1)<4> ubDNDI_RESP(2,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(1,17)<4> ubDNDI_RESP(2,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_1(1,3)<4> ubDNDI_RESP(2,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(1,19)<4> ubDNDI_RESP(2,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,1)<4> ubDNDI_RESP(2,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,17)<4> ubDNDI_RESP(2,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_1(2,3)<4> ubDNDI_RESP(2,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_1(2,19)<4> ubDNDI_RESP(2,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+
+// Pack 2nd field Y; Second 8x4 block
+mov (8) r21.0<1>:ud r18.0<8;8,1>:ud
+add (1) r21.0<1>:ud r21.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DI_OUT1_2(1)<2> ubDNDI_RESP(0,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(1,16)<2> ubDNDI_RESP(0,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(2)<2> ubDNDI_RESP(0,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT1_2(2,16)<2> ubDNDI_RESP(0,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 2nd field U, V; Second 8x4 block
+ mov (4) mubMSGHDR_DI_OUT1_2(1,1)<4> ubDNDI_RESP(2,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(1,17)<4> ubDNDI_RESP(2,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_2(1,3)<4> ubDNDI_RESP(2,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(1,19)<4> ubDNDI_RESP(2,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,1)<4> ubDNDI_RESP(2,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,17)<4> ubDNDI_RESP(2,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT1_2(2,3)<4> ubDNDI_RESP(2,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT1_2(2,19)<4> ubDNDI_RESP(2,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+send (8) null<1>:d r18.0 0x5 0x60A801B:ud
+send (8) null<1>:d r21.0 0x5 0x60A801B:ud
+
+// Pack 1st field Y; 1st 8x4 block
+ mov (8) mubMSGHDR_DI_OUT2_1(1)<2> ubDNDI_RESP(4,0)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(1,16)<2> ubDNDI_RESP(4,16)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(2)<2> ubDNDI_RESP(4,32)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_1(2,16)<2> ubDNDI_RESP(4,48)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 1st field U,V; 1st 8x4 block
+ mov (4) mubMSGHDR_DI_OUT2_1(1,1)<4> ubDNDI_RESP(6,1)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(1,17)<4> ubDNDI_RESP(6,17)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_1(1,3)<4> ubDNDI_RESP(6,0)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(1,19)<4> ubDNDI_RESP(6,16)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,1)<4> ubDNDI_RESP(6,33)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,17)<4> ubDNDI_RESP(6,49)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_1(2,3)<4> ubDNDI_RESP(6,32)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_1(2,19)<4> ubDNDI_RESP(6,48)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+// Pack 1st field Y; 2nd 8x4 block
+mov (8) r27.0<1>:ud r24.0<8;8,1>:ud
+add (1) r27.0<1>:ud r27.0<0;1,0>:w 0x10:w
+
+ mov (8) mubMSGHDR_DI_OUT2_2(1)<2> ubDNDI_RESP(4,8)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(1,16)<2> ubDNDI_RESP(4,24)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(2)<2> ubDNDI_RESP(4,40)<8;8,1> { NoDDClr } // copy line of Y directly to memory as optimization
+ mov (8) mubMSGHDR_DI_OUT2_2(2,16)<2> ubDNDI_RESP(4,56)<8;8,1> { NoDDClr, NoDDChk } // copy line of Y directly to memory as optimization
+
+// Pack 1st field U, V; 2nd 8x4 block
+ mov (4) mubMSGHDR_DI_OUT2_2(1,1)<4> ubDNDI_RESP(6,9)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(1,17)<4> ubDNDI_RESP(6,25)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_2(1,3)<4> ubDNDI_RESP(6,8)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(1,19)<4> ubDNDI_RESP(6,24)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,1)<4> ubDNDI_RESP(6,41)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,17)<4> ubDNDI_RESP(6,57)<8;4,2> { NoDDClr, NoDDChk } // copy line of U directly to memory as optimization
+
+ mov (4) mubMSGHDR_DI_OUT2_2(2,3)<4> ubDNDI_RESP(6,40)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+ mov (4) mubMSGHDR_DI_OUT2_2(2,19)<4> ubDNDI_RESP(6,56)<8;4,2> { NoDDChk } // copy line of V directly to memory as optimization
+
+send (8) null<1>:d r24.0 0x5 0x60A801E:ud
+send (8) null<1>:d r27.0 0x5 0x60A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/PL_DI_PA.g4a b/src/shaders/post_processing/gen7/PL_DI_PA.g4a
new file mode 100644
index 0000000..6f56e0d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/PL_DI_PA.g4a
@@ -0,0 +1,399 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 57 // Total instruction count
+// 1 // Total kernel count
+
+.kernel PL_DI_PA
+.code
+
+
+
+// FileName: DI.asm
+// Author: Vivek Kumar
+// Description: Tasks for DI only case (16x4 block)
+
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: DNDI.inc
+// Author: Vivek Kumar
+// Description: Include file for DN, DI and DNDI
+// Inputs: DI_ENABLE, DN_ENABLE, DN_PLANAR, DN_PACKED
+
+
+
+
+// End of common.inc
+
+
+//Interface:
+//Static Parameters:
+//r1
+
+
+//====================== Binding table (Explicit To DNDI)=========================================
+
+
+.declare mudMSGHDR_DNDI Base=r18 ElementSize=4 Type=ud
+.declare mdMSGHDR_DNDI Base=r18 ElementSize=4 Type=d
+.declare mwMSGHDR_DNDI Base=r18 ElementSize=2 Type=w
+
+
+.declare mudMSGHDR_STMM Base=r20 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_HIST Base=r22 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_ENC_STATS Base=r24 ElementSize=4 Type=ud
+.declare muwMSGHDR_ENC_STATS Base=r24 ElementSize=2 Type=uw
+.declare mubMSGHDR_ENC_STATS Base=r24 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=ud
+.declare mdMSGHDR_DN_OUT Base=r31.0 ElementSize=4 Type=d
+.declare mubMSGHDR_DN_OUT Base=r31.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=ud
+.declare mdMSGHDR_UVCOPY Base=r36 ElementSize=4 Type=d
+.declare mudMSGHDR_UCOPY Base=r36 ElementSize=4 Type=ud
+.declare mudMSGHDR_VCOPY Base=r38 ElementSize=4 Type=ud
+
+
+.declare mudMSGHDR_DI_OUT1 Base=r18.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT1 Base=r18.0 ElementSize=1 Type=ub
+
+
+.declare mudMSGHDR_DI_OUT2 Base=r23.0 ElementSize=4 Type=ud
+.declare mubMSGHDR_DI_OUT2 Base=r23.0 ElementSize=1 Type=ub
+
+//r45
+//Use r45 as message header, so no need to "mov" the data.
+
+.declare mudDN_Y_OUT Base=r45.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+// Message response (Denoised & DI-ed pixels & statistics); Use buffer 5
+.declare udDNDI_RESP Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare uwDNDI_RESP Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare ubDNDI_RESP Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+// Message response (UV Copy); Use buffer 5
+.declare udDNDI_UV_RESP Base=r58.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare ubDNDI_UV_RESP Base=r58.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+//Temp GRFs: For 42X to 422 Conversion
+.declare uwDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw //8 GRFs
+.declare ubDNDI_UVCOPY_TEMP Base=r10.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub //8 GRFs
+//---------------------------------------------------------------------------
+// Message descriptors
+//---------------------------------------------------------------------------
+// Extended message descriptor
+ // Message descriptor for sampler read
+ // = 000 0010 (message len 2) 00000 (resp len - set later, 12 or 5 or 11)
+ // 1 (header present 1) 0 11 (SIMD32/64 mode)
+ // 1000 (message type) 0000 (DI state index)
+ // 00000000 (binding table index - set later)
+ // = 0x040b8000
+
+
+// Attention: The Message Length is The Number of GRFs with Data Only, without the Header
+
+
+//---------------------------------------------------------------------------
+// VDI Return Data format
+//---------------------------------------------------------------------------
+// Defines for DI enabled
+
+
+// Defines for DI disabled
+
+
+
+// FileName: DNDI_Command.asm
+// Author: Vivek Kumar
+// Description: Sends a message to the VDI to process one DN (16x8) or DNDI (16x4) block
+
+// Prepare the DNDI send command
+mov (8) mudMSGHDR_DNDI(0)<1> r0.0<8;8,1>:ud // message header
+mov (1) mwMSGHDR_DNDI(1,4)<1> r7.0<0;1,0>:w { NoDDClr } // horizontal origin // Do we need to add offset here? -vK
+mov (1) mwMSGHDR_DNDI(1,12)<1> r7.1<0;1,0>:w { NoDDChk } // vertical origin // Can these 2 be combined? - vK
+
+send (8) udDNDI_RESP(0)<1> r18 0x2 0x4AE8003:ud
+
+// On Gen6, with VDI walker, use the XY pair returned rather than programmed above
+// VDI_RETURNED_XY is ordered XY in case of walker enables and the same as programmed in case of walker disabled
+mov (2) r7.0<1>:w uwDNDI_RESP(9,14)<2;2,1> // horizontal/Vertial origin in W.14 and W.15
+
+
+
+// FileName: DI_STMM_Save.asm
+// Author: Vivek Kumar
+// Description: Saves DI STMM Data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write STMM to memory
+mov (8) mudMSGHDR_STMM(0)<1> r0.0<8;8,1>:ud // message header
+mov (8) mudMSGHDR_STMM(1)<1> udDNDI_RESP(8,0) // Move STMM to MRF
+
+shr (1) mudMSGHDR_STMM(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } // X origin / 2
+mov (1) mudMSGHDR_STMM(0,1)<1> r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Y origin
+mov (1) mudMSGHDR_STMM(0,2)<1> 0x30007:ud { NoDDChk } // block width and height (8x4)
+
+send (8) null<1>:d r20 0x5 0x40A8021:ud
+
+
+
+// FileName: DNDI_Enc_Stats_Save.asm
+// Author: Vivek Kumar
+// Description: Saves Encoder Statistics data to statistics surface in case of DI enabled (for 16x4 block)
+
+// Write encoder statistics to memory
+//Currently enable this only on Gen6 validation
+mov (8) mudMSGHDR_ENC_STATS(1)<1> 0x0:ud // Init payload MRF
+mov (8) mudMSGHDR_ENC_STATS(0)<1> r0.0<8;8,1>:ud // message header
+
+shr (1) mudMSGHDR_ENC_STATS(0,0)<1> r7.0<0;1,0>:w 1:w { NoDDClr } //enable the flag after testing on si { NoDDClr } // X origin / 2
+mul (1) acc0.1<1>:ud r7.1<0;1,0>:w 3:w // Y origin * 3
+shr (1) mudMSGHDR_ENC_STATS(0,1)<1> acc0.1<0;1,0>:ud 2:w { NoDDClr, NoDDChk } //enable the flag after testing on si { NoDDClr, NoDDChk } // Y origin * 3/4
+mov (1) mudMSGHDR_ENC_STATS(0,2)<1> 0x20007:ud { NoDDChk } //enable the flag after testing on si { NoDDChk } // block width and height (8x3)
+add (2) mudMSGHDR_ENC_STATS(0,0)<1> mudMSGHDR_ENC_STATS(0,0)<2;2,1> r1.12<2;2,1>:uw // Add pitch to X,Y origin
+
+
+ //Data block for Encoder Statistics
+ //----------------------------------------------------
+ //| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Bytes
+ //----------------------------------------------------
+ //| BNE | MCNT | FCNT | TCNT | X | X | X | X |
+ //----------------------------------------------------
+ //| DcTpT | SVCM | DcBpT | DcTpB |
+ //----------------------------------------------------
+ //| SHCM | STAD | DcTcB | DcBpB |
+ //----------------------------------------------------
+ mov (1) mudMSGHDR_ENC_STATS(1,0)<1> udDNDI_RESP(9,1)<0;1,0> { NoDDClr } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,3)<2> udDNDI_RESP(9,3)<2;2,1> { NoDDClr, NoDDChk } // Move encoder statistics to MRF
+ mov (2) mudMSGHDR_ENC_STATS(1,2)<2> udDNDI_RESP(9,5)<2;2,1> { NoDDChk } // Move encoder statistics to MRF
+
+
+send (8) null<1>:d r24 0x5 0x40A8021:ud
+
+
+
+// FileName: DI_Save_PA_16x4.asm
+// Author: Vivek Kumar
+// Description: Save two 16x4 blocks of DI output in Packed format
+
+
+add (4) a0.4<1>:uw r2.28<4;4,1>:ub 608:w // Initial Y,U,V offset in YUV422 block; it starts at m20
+
+mov (8) r27.0<1>:ud r0.0<8;8,1>:ud
+shl (1) r27.0<1>:d r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be doubled
+mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin
+mov (1) r27.2<1>:ud 0x3001F:ud { NoDDChk } // Block width and height (32x8)
+
+//prepare the message headers
+mov (8) r18.0<1>:ud r27<8;8,1>:ud
+mov (8) r23.0<1>:ud r27<8;8,1>:ud
+
+// Pack 2nd field Y
+ mov (16) r[a0.4, 0]<2> ubDNDI_RESP(0,0) { NoDDClr }
+ mov (16) r[a0.4, 32]<2> ubDNDI_RESP(0,16) { NoDDClr }
+ mov (16) r[a0.4, 64]<2> ubDNDI_RESP(0,32) { NoDDClr }
+ mov (16) r[a0.4, 96]<2> ubDNDI_RESP(0,48) { NoDDClr }
+// Pack 2nd field U
+ mov (8) r[a0.5, 0]<4> ubDNDI_RESP(2,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 32]<4> ubDNDI_RESP(2,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 64]<4> ubDNDI_RESP(2,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 96]<4> ubDNDI_RESP(2,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 2nd field V
+ mov (8) r[a0.6, 0]<4> ubDNDI_RESP(2,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 32]<4> ubDNDI_RESP(2,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 64]<4> ubDNDI_RESP(2,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 96]<4> ubDNDI_RESP(2,48)<16;8,2> { NoDDChk } //Vpixels
+
+// Pack 1st field Y
+ mov (16) r[a0.4, 160]<2> ubDNDI_RESP(4,0) { NoDDClr }
+ mov (16) r[a0.4, 192]<2> ubDNDI_RESP(4,16) { NoDDClr }
+ mov (16) r[a0.4, 224]<2> ubDNDI_RESP(4,32) { NoDDClr }
+ mov (16) r[a0.4, 256]<2> ubDNDI_RESP(4,48) { NoDDClr }
+// Pack 1st field U
+ mov (8) r[a0.5, 160]<4> ubDNDI_RESP(6,1)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 192]<4> ubDNDI_RESP(6,17)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 224]<4> ubDNDI_RESP(6,33)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+ mov (8) r[a0.5, 256]<4> ubDNDI_RESP(6,49)<16;8,2> { NoDDClr, NoDDChk } //U pixels
+// Pack 1st field V
+ mov (8) r[a0.6, 160]<4> ubDNDI_RESP(6,0)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 192]<4> ubDNDI_RESP(6,16)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 224]<4> ubDNDI_RESP(6,32)<16;8,2> { NoDDChk } //Vpixels
+ mov (8) r[a0.6, 256]<4> ubDNDI_RESP(6,48)<16;8,2> { NoDDChk } //Vpixels
+
+//save the previous frame
+send (8) null<1>:d r18.0 0x5 0xA0A801B:ud
+
+//save the current frame
+send (8) null<1>:d r23.0 0x5 0xA0A801E:ud
+
+
+
+//End of Thread message
+
+mov (8) r127<1>:ud r0.0<8;8,1>:ud
+ send (1) null<1>:d r127 0x27 0x02000010
+
+
+.end_code
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/Save_AVS_NV12.g4a b/src/shaders/post_processing/gen7/Save_AVS_NV12.g4a
new file mode 100644
index 0000000..dafe9b1
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Save_AVS_NV12.g4a
@@ -0,0 +1,621 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 131 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_NV12.asm
+//
+// Save NV12 420 frame data block of size 16x16
+//
+// To save 16x16 block (16x16 bytes of Y and 16x8 bytes of interleaved UV), we need 2 send instructions with of size 16x16 and 16x8 each.
+// ---------------
+// | 16x16 |
+// | YUYV |
+// ---------------
+// | 16x8 UV |
+// ---------------
+
+//-----------------------------------------------------------------
+//The layout of data is as follows:
+//mMSGHDR0 : Y data header (16x16)
+//mubMSGPAYLOAD0 : Y data payload (8 GRFs)
+//mMSGHDR1 : U data header (16x8)
+//mubMSGPAYLOAD1 : U data payload (4 GRFs)
+//------------------------------------------------------------------
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+ // the r17 register (nTEMP0) is originally defined from "Common.inc"
+ // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+ .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+ // At the save module we have all 8 address sub-registers available.
+ // So we will use PING-PONG type of scheme to save the data using
+ // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
+ // reduce dependency. - rT
+
+ //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
+ //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
+ //Offsets are zero for buffer 0 and buffer 4.
+ add (4) a0.0:uw r22.0<4;4,1>:w 0:uw
+ add (4) a0.4:uw r22.0<4;4,1>:w 512:uw
+
+ //Set up header for Y,U and V data
+ mov (8) r28<1>:ud r27<8;8,1>:ud
+ mov (8) r37<1>:ud r27<8;8,1>:ud
+
+ mov (2) r28.0<1>:d r7.0<2;2,1>:w { NoDDClr } //ORI Y (LUMA) = ORI
+ mov (1) r37.0<1>:d r7.0<0;1,0>:w { NoDDClr } //H ORI (CHROMA) = H ORI
+ shr (1) r37.1<1>:d r7.1<0;1,0>:w 1:w { NoDDClr, NoDDChk } //V ORI (CHROMA) = V ORI/2
+
+ mov (1) r28.2<1>:ud 0xF000F:ud { NoDDChk } // Y Block width and height (16x16)
+ mov (1) r37.2<1>:ud 0x7000F:ud { NoDDChk } // UV Block width and height(16x8)
+
+// Unscramble, and pack data directly to MRFs
+
+// Data 16x16 block is divided as -
+// ---------
+// | 0 |
+// ---------
+// | 1 |
+// ---------
+// | 2 |
+// ---------
+// | 3 |
+// ---------
+// All sub-blocks are of size 16x4
+// 0: ubBUFFER_0
+// 1: ubBUFFER_1, ubBUFFER_0+16
+// 2: ubBUFFER_2
+// 3: ubBUFFER_3, ubBUFFER_2+16
+
+ //Y Rounding 16x4 top part
+ add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw
+
+ // U Averaging and Rounding, 8x2 top part
+ shr (8) uwBUFFER_5(0,0)<2> r[a0.2,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(1,0)<2> r[a0.2,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(2,0)<2> r[a0.2,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(3,0)<2> r[a0.2,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2>
+ add.sat (8) r[a0.2,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2>
+ add.sat (8) r[a0.2,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw
+
+ // V Averaging and Rounding, 8x2 top part
+ shr (8) uwBUFFER_5(4,0)<2> r[a0.0,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(5,0)<2> r[a0.0,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(6,0)<2> r[a0.0,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(7,0)<2> r[a0.0,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2>
+ add.sat (8) r[a0.0,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2>
+ add.sat (8) r[a0.0,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw
+
+ add (4) a0.0:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers
+
+ //Y Rounding, 16x4 bottom part
+ add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw
+
+ // U Averaging and Rounding, 8x2 bottom part
+ shr (8) uwBUFFER_5(0,0)<2> r[a0.6,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(1,0)<2> r[a0.6,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(2,0)<2> r[a0.6,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(3,0)<2> r[a0.6,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2>
+ add.sat (8) r[a0.6,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2>
+ add.sat (8) r[a0.6,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw
+
+ // V Averaging and Rounding, 8x2 bottom part
+ shr (8) uwBUFFER_5(4,0)<2> r[a0.4,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(5,0)<2> r[a0.4,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(6,0)<2> r[a0.4,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(7,0)<2> r[a0.4,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2>
+ add.sat (8) r[a0.4,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2>
+ add.sat (8) r[a0.4,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw
+
+ add (4) a0.4:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers
+ //Y Rounding 16x4 top part
+ add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw
+
+ // U Averaging and Rounding, 8x2 top part
+ shr (8) uwBUFFER_5(0,0)<2> r[a0.2,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(1,0)<2> r[a0.2,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(2,0)<2> r[a0.2,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(3,0)<2> r[a0.2,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2>
+ add.sat (8) r[a0.2,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2>
+ add.sat (8) r[a0.2,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw
+
+ // V Averaging and Rounding, 8x2 top part
+ shr (8) uwBUFFER_5(4,0)<2> r[a0.0,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(5,0)<2> r[a0.0,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(6,0)<2> r[a0.0,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(7,0)<2> r[a0.0,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2>
+ add.sat (8) r[a0.0,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2>
+ add.sat (8) r[a0.0,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw
+
+ add (4) a0.0:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers
+
+ //Y Rounding, 16x4 bottom part
+ add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw
+
+ // U Averaging and Rounding, 8x2 bottom part
+ shr (8) uwBUFFER_5(0,0)<2> r[a0.6,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(1,0)<2> r[a0.6,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(2,0)<2> r[a0.6,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(3,0)<2> r[a0.6,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(0,0)<2> uwBUFFER_5(0,0)<16;8,2> uwBUFFER_5(1,0)<16;8,2>
+ add.sat (8) r[a0.6,0]<2>:uw uwBUFFER_5(0,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(2,0)<2> uwBUFFER_5(2,0)<16;8,2> uwBUFFER_5(3,0)<16;8,2>
+ add.sat (8) r[a0.6,64]<2>:uw uwBUFFER_5(2,0)<16;8,2> 0x0080:uw
+
+ // V Averaging and Rounding, 8x2 bottom part
+ shr (8) uwBUFFER_5(4,0)<2> r[a0.4,0]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(5,0)<2> r[a0.4,32]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(6,0)<2> r[a0.4,64]<16;8,2>:uw 1:w
+ shr (8) uwBUFFER_5(7,0)<2> r[a0.4,96]<16;8,2>:uw 1:w
+
+ add (8) uwBUFFER_5(4,0)<2> uwBUFFER_5(4,0)<16;8,2> uwBUFFER_5(5,0)<16;8,2>
+ add.sat (8) r[a0.4,0]<2>:uw uwBUFFER_5(4,0)<16;8,2> 0x0080:uw
+
+ add (8) uwBUFFER_5(6,0)<2> uwBUFFER_5(6,0)<16;8,2> uwBUFFER_5(7,0)<16;8,2>
+ add.sat (8) r[a0.4,64]<2>:uw uwBUFFER_5(6,0)<16;8,2> 0x0080:uw
+
+ add (4) a0.4:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers
+ // restore pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4 registers
+ add (4) a0.0:uw r22.0<4;4,1>:w 0:uw
+ add (4) a0.4:uw r22.0<4;4,1>:w 512:uw
+
+//Buffer 0
+//Move Y to msg payload
+ mov (16) mubMSGPAYLOAD0(0,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(0,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(1,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(1,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk }
+
+//Move U to msg payload
+ mov (8) mubMSGPAYLOAD1(0,0)<2> r[a0.2, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(0,16)<2> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+//Move V to msg payload
+ mov (8) mubMSGPAYLOAD1(0,1)<2> r[a0.0, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(0,17)<2> r[a0.0, 65]<32;8,4>:ub { NoDDChk }
+
+ add (4) a0.0:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers
+
+//Buffer 1
+ mov (16) mubMSGPAYLOAD0(2,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(2,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(3,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(3,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(1,0)<2> r[a0.6, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(1,16)<2> r[a0.6, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(1,1)<2> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(1,17)<2> r[a0.4, 65]<32;8,4>:ub { NoDDChk }
+
+ add (4) a0.4:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers
+
+//Buffer 2
+ mov (16) mubMSGPAYLOAD0(4,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(4,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(5,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(5,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(2,0)<2> r[a0.2, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(2,16)<2> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(2,1)<2> r[a0.0, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(2,17)<2> r[a0.0, 65]<32;8,4>:ub { NoDDChk }
+
+//Buffer 3
+ mov (16) mubMSGPAYLOAD0(6,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(6,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(7,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(7,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(3,0)<2> r[a0.6, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(3,16)<2> r[a0.6, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(3,1)<2> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(3,17)<2> r[a0.4, 65]<32;8,4>:ub { NoDDChk }
+//===========================================================================
+
+send (1) null<1>:d r28 0x5 0x120A8018:ud
+send (1) null<1>:d r37 0x5 0xA0A8019:ud
diff --git a/src/shaders/post_processing/gen7/Save_AVS_PA.g4a b/src/shaders/post_processing/gen7/Save_AVS_PA.g4a
new file mode 100644
index 0000000..42f87c3
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Save_AVS_PA.g4a
@@ -0,0 +1,625 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 174 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_PA.asm
+//
+// Save PA 422 frame data block of size 16x16
+//
+// To save 16x16 block (32x16 bytes of YUYV) we need 2 send instructions with of size 16x16 each.
+// -------------------------------
+// | 16x16 | 16x16 |
+// | YUYV | YUYV |
+// -------------------------------
+// these 2 sends are replaced by 8 32x2 sends to improve performance
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+ // the r17 register (nTEMP0) is originally defined from "Common.inc"
+ // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+ .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+ //wBUFF_CHNL_PTR points to buffer 0.
+ //Add appropriate offsets to get pointers for all buffers (1,2,3).
+ //Offset is zero for buffer 0.
+ add (4) a0.0:uw r22.0<4;4,1>:w 0:uw
+
+ //Set DEST pointers according to output packing i.e. YUYV, YVYU, UYVY, VYUY
+ add (4) a0.4<1>:w r2.28<4;4,1>:ub 928:uw
+
+ shl (1) r27.0<1>:d r7.0<0;1,0>:w 1:w { NoDDClr } // H. block origin need to be 2 times
+ mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin (1st quadrant)
+ mov (1) r27.2<1>:ud 0x1001F:ud { NoDDChk } // Block width and height (32x2)
+
+// Rounding
+ // left
+ add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw
+
+ // right
+ add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw
+
+ add (4) a0.0:uw r22.0<4;4,1>:w 512:uw
+ // left
+ add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw
+
+ // right
+ add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw
+
+ add (4) a0.0:uw r22.0<4;4,1>:w 1024:uw
+ // left
+ add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw
+
+ // right
+ add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw
+
+ add (4) a0.0:uw r22.0<4;4,1>:w 1536:uw
+ // left
+ add.sat (4) r[a0.0, 0]<2>:uw r[a0.0, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,32]<2>:uw r[a0.0, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,64]<2>:uw r[a0.0, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,96]<2>:uw r[a0.0, 96]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 0]<1>:uw r[a0.1, 0]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,32]<1>:uw r[a0.1, 32]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,64]<1>:uw r[a0.1, 64]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,96]<1>:uw r[a0.1, 96]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 0]<2>:uw r[a0.2, 0]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,32]<2>:uw r[a0.2, 32]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,64]<2>:uw r[a0.2, 64]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,96]<2>:uw r[a0.2, 96]<8;4,2>:uw 0x0080:uw
+
+ // right
+ add.sat (4) r[a0.0,16]<2>:uw r[a0.0, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,48]<2>:uw r[a0.0, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,80]<2>:uw r[a0.0, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.0,112]<2>:uw r[a0.0, 112]<8;4,2>:uw 0x0080:uw
+
+ add.sat (8) r[a0.1, 16]<1>:uw r[a0.1, 16]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,48]<1>:uw r[a0.1, 48]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,80]<1>:uw r[a0.1, 80]<8;8,1>:uw 0x0080:uw
+ add.sat (8) r[a0.1,112]<1>:uw r[a0.1, 112]<8;8,1>:uw 0x0080:uw
+
+ add.sat (4) r[a0.2, 16]<2>:uw r[a0.2, 16]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,48]<2>:uw r[a0.2, 48]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,80]<2>:uw r[a0.2, 80]<8;4,2>:uw 0x0080:uw
+ add.sat (4) r[a0.2,112]<2>:uw r[a0.2, 112]<8;4,2>:uw 0x0080:uw
+
+ add (4) a0.0:uw r22.0<4;4,1>:w 2048:uw
+ // restore pointer
+ add (4) a0.0:uw r22.0<4;4,1>:w 0:uw
+
+ mov (8) r28<1>:ud r27<8;8,1>:ud
+ mov (8) r37<1>:ud r27<8;8,1>:ud
+ add (1) r37.1<1>:d r27.1<0;1,0>:d 2:d // Point to 2nd part
+
+
+ mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk }
+
+ mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk }
+
+ send (1) null<1>:d r28 0x5 0x60A8018:ud
+ send (1) null<1>:d r37 0x5 0x60A8018:ud
+
+ // restore pointer
+ add (4) a0.0:uw r22.0<4;4,1>:w 512:uw
+
+ add (1) r28.1<1>:d r27.1<0;1,0>:d 4:d // Point to 2nd part
+ add (1) r37.1<1>:d r27.1<0;1,0>:d 6:d // Point to 2nd part
+
+
+ mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk }
+
+ mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk }
+
+ send (1) null<1>:d r28 0x5 0x60A8018:ud
+ send (1) null<1>:d r37 0x5 0x60A8018:ud
+
+ // restore pointer
+ add (4) a0.0:uw r22.0<4;4,1>:w 1024:uw
+
+ add (1) r28.1<1>:d r27.1<0;1,0>:d 8:d // Point to 2nd part
+ add (1) r37.1<1>:d r27.1<0;1,0>:d 10:d // Point to 2nd part
+
+
+ mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk }
+
+ mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk }
+
+ send (1) null<1>:d r28 0x5 0x60A8018:ud
+ send (1) null<1>:d r37 0x5 0x60A8018:ud
+
+ // restore pointer
+ add (4) a0.0:uw r22.0<4;4,1>:w 1536:uw
+
+ add (1) r28.1<1>:d r27.1<0;1,0>:d 12:d // Point to 2nd part
+ add (1) r37.1<1>:d r27.1<0;1,0>:d 14:d // Point to 2nd part
+
+ mov (8) r[a0.6, 0]<4>:ub r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 32]<4>:ub r[a0.0,33]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4, 0]<2>:ub r[a0.1, 1]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4, 32]<2>:ub r[a0.1,33]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5, 0]<4>:ub r[a0.2, 1]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5, 32]<4>:ub r[a0.2,33]<32;8,4>:ub { NoDDChk }
+
+ mov (8) r[a0.6, 288]<4>:ub r[a0.0,65]<32;8,4>:ub { NoDDClr }
+ mov (8) r[a0.6, 320]<4>:ub r[a0.0,97]<32;8,4>:ub { NoDDClr }
+ mov (16) r[a0.4,288]<2>:ub r[a0.1,65]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (16) r[a0.4,320]<2>:ub r[a0.1,97]<32;16,2>:ub { NoDDClr, NoDDChk }
+ mov (8) r[a0.5,288]<4>:ub r[a0.2,65]<32;8,4>:ub { NoDDChk }
+ mov (8) r[a0.5,320]<4>:ub r[a0.2,97]<32;8,4>:ub { NoDDChk }
+
+ send (1) null<1>:d r28 0x5 0x60A8018:ud
+ send (1) null<1>:d r37 0x5 0x60A8018:ud
+
diff --git a/src/shaders/post_processing/gen7/Save_AVS_PL3.g4a b/src/shaders/post_processing/gen7/Save_AVS_PL3.g4a
new file mode 100644
index 0000000..cecb5be
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Save_AVS_PL3.g4a
@@ -0,0 +1,564 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 84 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_PL3.asm
+//
+// Save PL3 420 frame data block of size 16x16
+//
+// To save 16x16 block (16x16 byte of Y and 8x8 byte of U and V each) we need 3 send instructions with one of size 16x16 and two of size 8x8.
+// -----------------
+// | 16x16 Y |
+// | |
+// -----------------
+// | 8x8 U |
+// ---------
+// | 8x8 V |
+// ---------
+
+//-----------------------------------------------------------------
+//The layout of data is as follows:
+//mMSGHDR0 : Y data header (16x16)
+//mubMSGPAYLOAD0 : Y data payload (8 GRFs)
+//mMSGHDR1 : U data header (8x8)
+//mubMSGPAYLOAD1 : U data payload (2 GRFs)
+//mMSGHDR2 : V data header (8x8)
+//mubMSGPAYLOAD2 : V data payload (2 GRFs)
+//------------------------------------------------------------------
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+ // the r17 register (nTEMP0) is originally defined from "Common.inc"
+ // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+ .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+ // At the save module we have all 8 address sub-registers available.
+ // So we will use PING-PONG type of scheme to save the data using
+ // pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
+ // reduce dependency. - rT
+
+ //wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
+ //Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
+ //Offsets are zero for buffer 0 and buffer 4.
+ add (4) a0.0:uw r22.0<4;4,1>:w 0:uw
+ add (4) a0.4:uw r22.0<4;4,1>:w 512:uw
+
+ //Set up header for Y,U and V data
+ mov (8) r28<1>:ud r27<8;8,1>:ud
+ mov (8) r37<1>:ud r27<8;8,1>:ud
+ mov (8) r46<1>:ud r27<8;8,1>:ud
+
+ mov (2) r28.0<1>:d r7.0<2;2,1>:w { NoDDClr } //ORI Y (LUMA) = ORI
+ shr (2) r37.0<1>:d r7.0<2;2,1>:w 1:w { NoDDClr } //H/V ORI U = H/V ORI/2
+ shr (2) r46.0<1>:d r7.0<2;2,1>:w 1:w { NoDDClr } //H/V ORI V = H/V ORI/2
+
+ mov (1) r28.2<1>:ud 0xF000F:ud { NoDDChk } // Y Block width and height (16x16)
+ mov (1)r37.2<1>:ud 0x70007:ud { NoDDChk } // U Block width and height (8x8)
+ mov (1)r46.2<1>:ud 0x70007:ud { NoDDChk } // V Block width and height (8x8)
+
+// Unscramble, and pack data directly to MRFs
+
+// Data 16x16 block is divided as -
+// ---------
+// | 0 |
+// ---------
+// | 1 |
+// ---------
+// | 2 |
+// ---------
+// | 3 |
+// ---------
+// All sub-blocks are of size 16x4
+// 0: ubBUFFER_0
+// 1: ubBUFFER_1, ubBUFFER_0+16
+// 2: ubBUFFER_2
+// 3: ubBUFFER_3, ubBUFFER_2+16
+
+ //Y Rounding, first
+ add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw
+
+ // U rounding
+ add.sat (8) r[a0.2,0]<2>:uw r[a0.2,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.2,64]<2>:uw r[a0.2,64]<16;8,2>:uw 0x0080:uw
+
+ // V rounding
+ add.sat (8) r[a0.0,0]<2>:uw r[a0.0,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.0,64]<2>:uw r[a0.0,64]<16;8,2>:uw 0x0080:uw
+
+ add (4) a0.0:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers
+
+ //Y Rounding, second
+ add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw
+
+ // U rounding
+ add.sat (8) r[a0.6,0]<2>:uw r[a0.6,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.6,64]<2>:uw r[a0.6,64]<16;8,2>:uw 0x0080:uw
+
+ // V rounding
+ add.sat (8) r[a0.4,0]<2>:uw r[a0.4,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.4,64]<2>:uw r[a0.4,64]<16;8,2>:uw 0x0080:uw
+
+ add (4) a0.4:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers
+
+ //Y Rounding, third
+ add.sat (16) r[a0.1,0]<1>:uw r[a0.1,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,32]<1>:uw r[a0.1,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,64]<1>:uw r[a0.1,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.1,96]<1>:uw r[a0.1,96]<16;16,1>:uw 0x0080:uw
+
+ // U rounding
+ add.sat (8) r[a0.2,0]<2>:uw r[a0.2,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.2,64]<2>:uw r[a0.2,64]<16;8,2>:uw 0x0080:uw
+
+ // V rounding
+ add.sat (8) r[a0.0,0]<2>:uw r[a0.0,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.0,64]<2>:uw r[a0.0,64]<16;8,2>:uw 0x0080:uw
+
+
+ //Y Rounding, fourth
+ add.sat (16) r[a0.5,0]<1>:uw r[a0.5,0]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,32]<1>:uw r[a0.5,32]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,64]<1>:uw r[a0.5,64]<16;16,1>:uw 0x0080:uw
+ add.sat (16) r[a0.5,96]<1>:uw r[a0.5,96]<16;16,1>:uw 0x0080:uw
+
+ // U rounding
+ add.sat (8) r[a0.6,0]<2>:uw r[a0.6,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.6,64]<2>:uw r[a0.6,64]<16;8,2>:uw 0x0080:uw
+
+ // V rounding
+ add.sat (8) r[a0.4,0]<2>:uw r[a0.4,0]<16;8,2>:uw 0x0080:uw
+ add.sat (8) r[a0.4,64]<2>:uw r[a0.4,64]<16;8,2>:uw 0x0080:uw
+
+ // restore the TOP and BOT pointers
+ add (4) a0.0:uw r22.0<4;4,1>:w 0:uw
+ add (4) a0.4:uw r22.0<4;4,1>:w 512:uw
+
+//Buffer 0
+//Move Y to msg payload
+ mov (16) mubMSGPAYLOAD0(0,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(0,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(1,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(1,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk }
+
+//Move U to msg payload
+ mov (8) mubMSGPAYLOAD1(0,0)<1> r[a0.2, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(0,8)<1> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+//Move V to msg payload
+ mov (8) mubMSGPAYLOAD2(0,0)<1> r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD2(0,8)<1> r[a0.0, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+ add (4) a0.0:uw r22.0<4;4,1>:w 1024:uw //Update Buffer 2 pointers
+
+//Buffer 1
+ mov (16) mubMSGPAYLOAD0(2,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(2,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(3,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(3,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(0,16)<1> r[a0.6, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(0,24)<1> r[a0.6, 65]<32;8,4>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD2(0,16)<1> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(0,24)<1> r[a0.4, 65]<32;8,4>:ub { NoDDChk }
+
+ add (4) a0.4:uw r22.0<4;4,1>:w 1536:uw //Update Buffer 3 pointers
+
+//Buffer 2
+ mov (16) mubMSGPAYLOAD0(4,0)<1> r[a0.1, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(4,16)<1> r[a0.1, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(5,0)<1> r[a0.1, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(5,16)<1> r[a0.1, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(1,0)<1> r[a0.2, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(1,8)<1> r[a0.2, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+ mov (8) mubMSGPAYLOAD2(1,0)<1> r[a0.0, 1]<32;8,4>:ub { NoDDClr }
+ mov (8) mubMSGPAYLOAD2(1,8)<1> r[a0.0, 65]<32;8,4>:ub { NoDDClr, NoDDChk }
+
+//Buffer 3
+ mov (16) mubMSGPAYLOAD0(6,0)<1> r[a0.5, 1]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(6,16)<1> r[a0.5, 33]<32;16,2>:ub { NoDDChk }
+ mov (16) mubMSGPAYLOAD0(7,0)<1> r[a0.5, 65]<32;16,2>:ub { NoDDClr }
+ mov (16) mubMSGPAYLOAD0(7,16)<1> r[a0.5, 97]<32;16,2>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(1,16)<1> r[a0.6, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(1,24)<1> r[a0.6, 65]<32;8,4>:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD2(1,16)<1> r[a0.4, 1]<32;8,4>:ub { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(1,24)<1> r[a0.4, 65]<32;8,4>:ub { NoDDChk }
+
+//===========================================================================
+
+send (1) null<1>:d r28 0x5 0x120A8018:ud
+send (1) null<1>:d r37 0x5 0x60A8019:ud
+send (1) null<1>:d r46 0x5 0x60A801A:ud
diff --git a/src/shaders/post_processing/gen7/Save_AVS_RGB.g4a b/src/shaders/post_processing/gen7/Save_AVS_RGB.g4a
new file mode 100644
index 0000000..92bddf8
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Save_AVS_RGB.g4a
@@ -0,0 +1,668 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 198 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// Module name: Save_AVS_RGB.asm
+//
+// Save packed ARGB 444 frame data block of size 16x16
+//
+// To save 16x16 block (64x16 byte layout for ARGB8888) we need 4 send instructions with 16x16 in each
+// -----------------
+// | 0 | 1 | 2 | 3 |
+// -----------------
+// the 4 16x16 block send has been replaced by 16 32x2 sends to get better performance
+
+
+
+// Module name: Save.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Msg payload buffers; upto 4 full-size messages can be written
+
+
+.declare mudMSGPAYLOAD0 Base=r29.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD1 Base=r38.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD2 Base=r47.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mudMSGPAYLOAD3 Base=r56.0 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+
+.declare muwMSGPAYLOAD0 Base=r29.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD1 Base=r38.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD2 Base=r47.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare muwMSGPAYLOAD3 Base=r56.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+
+.declare mubMSGPAYLOAD0 Base=r29.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD1 Base=r38.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD2 Base=r47.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD3 Base=r56.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD4 Base=r32.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD5 Base=r41.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD6 Base=r50.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare mubMSGPAYLOAD7 Base=r59.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+
+
+ // the r17 register (nTEMP0) is originally defined from "Common.inc"
+ // instead of re-defining a nTEMP0 here, we use "SAVE_RGB" suffix for its naming
+
+ .declare uwTemp0 Base=r17.0 ElementSize=2 Type=uw
+
+
+//_SAVE_INC_
+
+
+// At the save module we have all 8 address sub-registers available.
+// So we will use PING-PONG type of scheme to save the data using
+// pointers pBUF_CHNL_TOP_8x4 and pBUF_CHNL_BOT_8x4. This will help
+// reduce dependency. - rT
+
+// channel switching based on bit 0 of uWRGB_BGR_CH_SWITCH
+
+ // if channel swap?
+ and.nz.f0.0 null<1>:w r2.3<0;1,0>:uw 0x01:w
+
+//wBUFF_CHNL_PTR points to either buffer 0 or buffer 4.
+//Add appropriate offsets to get pointers for all buffers (1,2,3 or 5).
+//Offsets are zero for buffer 0 and buffer 4.
+ add (4) a0.0:uw r22.0<4;4,1>:w 0:uw
+
+ // pointer swap
+ (f0.0) mov (1) uwTemp0<1> a0.0:uw
+ (f0.0) mov (1) a0.0:uw a0.2:uw
+ (f0.0) mov (1) a0.2:uw uwTemp0<0;1,0>
+
+ shl (1) r27.0<1>:d r7.0<0;1,0>:w 2:w { NoDDClr } // H. block origin need to be quadrupled
+ mov (1) r27.1<1>:d r7.1<0;1,0>:w { NoDDClr, NoDDChk } // Block origin (1st quadrant)
+ mov (1) r27.2<1>:ud 0x1001F:ud { NoDDChk } // Block width and height (32x2)
+
+ add (4) a0.4:uw a0.0<4;4,1>:w r22.8<0;2,1>:w
+
+ mov (8) r28<1>:ud r27<8;8,1>:ud
+ mov (8) r37<1>:ud r27<8;8,1>:ud
+ mov (8) r46<1>:ud r27<8;8,1>:ud
+ mov (8) r55<1>:ud r27<8;8,1>:ud
+
+ mov (8) r31<1>:ud r27<8;8,1>:ud
+ mov (8) r40<1>:ud r27<8;8,1>:ud
+ mov (8) r49<1>:ud r27<8;8,1>:ud
+ mov (8) r58<1>:ud r27<8;8,1>:ud
+
+
+//for BUFFER 0
+ add (1) r37.1<1>:d r27.1<0;1,0>:d 2:d
+ add (1) r46.0<1>:d r27.0<0;1,0>:d 32:d
+ add (1) r55.0<1>:d r27.0<0;1,0>:d 32:d
+ add (1) r55.1<1>:d r27.1<0;1,0>:d 2:d
+
+// for BUFFER 1
+ add (1) r31.1<1>:d r27.1<0;1,0>:d 4:d
+ add (1) r40.1<1>:d r27.1<0;1,0>:d 6:d
+ add (1) r49.0<1>:d r27.0<0;1,0>:d 32:d
+ add (1) r49.1<1>:d r27.1<0;1,0>:d 4:d
+ add (1) r58.0<1>:d r27.0<0;1,0>:d 32:d
+ add (1) r58.1<1>:d r27.1<0;1,0>:d 6:d
+ // write Buf_0 to 1st quarter of four horizontal output blocks
+
+// Please note the scattered order of NODDCLR, NODDCHK flags. Since the sub-registers
+// of destination reg are not updated at one place and hence even flags are scattered. -rT
+
+ mov (8) mubMSGPAYLOAD0(0, 0)<4> r[a0.2, 1]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD0(0, 1)<4> r[a0.1, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD0(0, 2)<4> r[a0.0, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD0(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD0(1, 0)<4> r[a0.2, 33]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD0(1, 1)<4> r[a0.1, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD0(1, 2)<4> r[a0.0, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD0(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(0, 0)<4> r[a0.6, 1]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(0, 1)<4> r[a0.5, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(0, 2)<4> r[a0.4, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(1, 0)<4> r[a0.6, 33]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(1, 1)<4> r[a0.5, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(1, 2)<4> r[a0.4, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD2(0, 0)<4> r[a0.2, 17]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD2(0, 1)<4> r[a0.1, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(0, 2)<4> r[a0.0, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD2(1, 0)<4> r[a0.2, 49]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD2(1, 1)<4> r[a0.1, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(1, 2)<4> r[a0.0, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD3(0, 0)<4> r[a0.6, 17]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD3(0, 1)<4> r[a0.5, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD3(0, 2)<4> r[a0.4, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD3(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD3(1, 0)<4> r[a0.6, 49]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD3(1, 1)<4> r[a0.5, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD3(1, 2)<4> r[a0.4, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD3(1, 3)<4> r2.31:ub { NoDDChk }
+
+ // write Buf_1 to 2nd quarter of four horizontal output blocks
+ add (4) a0.0:uw r22.0<4;4,1>:w 512:uw
+
+ // pointer swap
+ (f0.0) mov (1) uwTemp0<1> a0.0:uw
+ (f0.0) mov (1) a0.0:uw a0.2:uw
+ (f0.0) mov (1) a0.2:uw uwTemp0<0;1,0>
+
+ add (4) a0.4:uw a0.0<4;4,1>:w r22.8<0;2,1>:w
+
+ mov (8) mubMSGPAYLOAD4(0, 0)<4> r[a0.2, 1]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD4(0, 1)<4> r[a0.1, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD4(0, 2)<4> r[a0.0, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD4(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD4(1, 0)<4> r[a0.2, 33]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD4(1, 1)<4> r[a0.1, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD4(1, 2)<4> r[a0.0, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD4(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD5(0, 0)<4> r[a0.6, 1]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD5(0, 1)<4> r[a0.5, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD5(0, 2)<4> r[a0.4, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD5(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD5(1, 0)<4> r[a0.6, 33]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD5(1, 1)<4> r[a0.5, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD5(1, 2)<4> r[a0.4, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD5(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD6(0, 0)<4> r[a0.2, 17]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD6(0, 1)<4> r[a0.1, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD6(0, 2)<4> r[a0.0, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD6(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD6(1, 0)<4> r[a0.2, 49]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD6(1, 1)<4> r[a0.1, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD6(1, 2)<4> r[a0.0, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD6(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD7(0, 0)<4> r[a0.6, 17]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD7(0, 1)<4> r[a0.5, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD7(0, 2)<4> r[a0.4, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD7(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD7(1, 0)<4> r[a0.6, 49]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD7(1, 1)<4> r[a0.5, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD7(1, 2)<4> r[a0.4, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD7(1, 3)<4> r2.31:ub { NoDDChk }
+
+ // send buffer 0 and buffer 1
+ send (1) null<1>:d r28 0x5 0x60A8018:ud
+ send (1) null<1>:d r37 0x5 0x60A8018:ud
+ send (1) null<1>:d r46 0x5 0x60A8018:ud
+ send (1) null<1>:d r55 0x5 0x60A8018:ud
+
+ send (1) null<1>:d r31 0x5 0x60A8018:ud
+ send (1) null<1>:d r40 0x5 0x60A8018:ud
+ send (1) null<1>:d r49 0x5 0x60A8018:ud
+ send (1) null<1>:d r58 0x5 0x60A8018:ud
+
+//==========
+//prepare headers
+//for BUFFER 2
+ add (1) r28.1<1>:d r27.1<0;1,0>:d 8:d
+ add (1) r37.1<1>:d r27.1<0;1,0>:d 10:d
+ add (1) r46.0<1>:d r27.0<0;1,0>:d 32:d
+ add (1) r46.1<1>:d r27.1<0;1,0>:d 8:d
+ add (1) r55.0<1>:d r27.0<0;1,0>:d 32:d
+ add (1) r55.1<1>:d r27.1<0;1,0>:d 10:d
+// for BUFFER 3
+ add (1) r31.1<1>:d r27.1<0;1,0>:d 12:d
+ add (1) r40.1<1>:d r27.1<0;1,0>:d 14:d
+ add (1) r49.0<1>:d r27.0<0;1,0>:d 32:d
+ add (1) r49.1<1>:d r27.1<0;1,0>:d 12:d
+ add (1) r58.0<1>:d r27.0<0;1,0>:d 32:d
+ add (1) r58.1<1>:d r27.1<0;1,0>:d 14:d
+
+//===========
+
+ // write Buf_2 to 3rd quarter of four horizontal output blocks
+ add (4) a0.0:uw r22.0<4;4,1>:w 1024:uw
+
+ // pointer swap
+ (f0.0) mov (1) uwTemp0<1> a0.0:uw
+ (f0.0) mov (1) a0.0:uw a0.2:uw
+ (f0.0) mov (1) a0.2:uw uwTemp0<0;1,0>
+
+ add (4) a0.4:uw a0.0<4;4,1>:w r22.8<0;2,1>:w
+
+ mov (8) mubMSGPAYLOAD0(0, 0)<4> r[a0.2, 1]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD0(0, 1)<4> r[a0.1, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD0(0, 2)<4> r[a0.0, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD0(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD0(1, 0)<4> r[a0.2, 33]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD0(1, 1)<4> r[a0.1, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD0(1, 2)<4> r[a0.0, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD0(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(0, 0)<4> r[a0.6, 1]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(0, 1)<4> r[a0.5, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(0, 2)<4> r[a0.4, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD1(1, 0)<4> r[a0.6, 33]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD1(1, 1)<4> r[a0.5, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(1, 2)<4> r[a0.4, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD1(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD2(0, 0)<4> r[a0.2, 17]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD2(0, 1)<4> r[a0.1, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(0, 2)<4> r[a0.0, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD2(1, 0)<4> r[a0.2, 49]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD2(1, 1)<4> r[a0.1, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(1, 2)<4> r[a0.0, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD2(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD3(0, 0)<4> r[a0.6, 17]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD3(0, 1)<4> r[a0.5, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD3(0, 2)<4> r[a0.4, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD3(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD3(1, 0)<4> r[a0.6, 49]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD3(1, 1)<4> r[a0.5, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD3(1, 2)<4> r[a0.4, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD3(1, 3)<4> r2.31:ub { NoDDChk }
+
+ // write Buf_3 to 4th quarter of four horizontal output blocks
+ add (4) a0.0:uw r22.0<4;4,1>:w 1536:uw
+
+ // pointer swap
+ (f0.0) mov (1) uwTemp0<1> a0.0:uw
+ (f0.0) mov (1) a0.0:uw a0.2:uw
+ (f0.0) mov (1) a0.2:uw uwTemp0<0;1,0>
+
+ add (4) a0.4:uw a0.0<4;4,1>:w r22.8<0;2,1>:w
+
+ mov (8) mubMSGPAYLOAD4(0, 0)<4> r[a0.2, 1]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD4(0, 1)<4> r[a0.1, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD4(0, 2)<4> r[a0.0, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD4(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD4(1, 0)<4> r[a0.2, 33]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD4(1, 1)<4> r[a0.1, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD4(1, 2)<4> r[a0.0, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD4(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD5(0, 0)<4> r[a0.6, 1]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD5(0, 1)<4> r[a0.5, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD5(0, 2)<4> r[a0.4, 1]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD5(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD5(1, 0)<4> r[a0.6, 33]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD5(1, 1)<4> r[a0.5, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD5(1, 2)<4> r[a0.4, 33]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD5(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD6(0, 0)<4> r[a0.2, 17]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD6(0, 1)<4> r[a0.1, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD6(0, 2)<4> r[a0.0, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD6(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD6(1, 0)<4> r[a0.2, 49]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD6(1, 1)<4> r[a0.1, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD6(1, 2)<4> r[a0.0, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD6(1, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD7(0, 0)<4> r[a0.6, 17]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD7(0, 1)<4> r[a0.5, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD7(0, 2)<4> r[a0.4, 17]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD7(0, 3)<4> r2.31:ub { NoDDChk }
+
+ mov (8) mubMSGPAYLOAD7(1, 0)<4> r[a0.6, 49]<16;8,2> { NoDDClr }
+ mov (8) mubMSGPAYLOAD7(1, 1)<4> r[a0.5, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD7(1, 2)<4> r[a0.4, 49]<16;8,2> { NoDDClr, NoDDChk }
+ mov (8) mubMSGPAYLOAD7(1, 3)<4> r2.31:ub { NoDDChk }
+
+ // send buffer 2 and buffer 3
+ send (1) null<1>:d r28 0x5 0x60A8018:ud
+ send (1) null<1>:d r37 0x5 0x60A8018:ud
+ send (1) null<1>:d r46 0x5 0x60A8018:ud
+ send (1) null<1>:d r55 0x5 0x60A8018:ud
+
+ send (1) null<1>:d r31 0x5 0x60A8018:ud
+ send (1) null<1>:d r40 0x5 0x60A8018:ud
+ send (1) null<1>:d r49 0x5 0x60A8018:ud
+ send (1) null<1>:d r58 0x5 0x60A8018:ud
diff --git a/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_BGRA.g4a b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_BGRA.g4a
new file mode 100644
index 0000000..4c4144f
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_BGRA.g4a
@@ -0,0 +1,362 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 7 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_BGRA.asm
+
+
+
+//Module Name: Set_Buf_0123_BGRA
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ //AVS LAYOUT:(UUYYVVAA)
+ //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+ // V = 8, Y= 4, U = 0, A = 12.
+ mov (4) acc0.0<1>:w 0x6AE2:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw
+ shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw
+
+ //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+
+ //SU LAYOUT:(VYUAVYUA)
+ //V = 4, Y = 2, U = 0, A = 6
+ mov (4) acc0.0<1>:w 0x6024:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw
+ shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address.
+
+ //OFFSET:
+ mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL2.g4a b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL2.g4a
new file mode 100644
index 0000000..1d38ae2
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL2.g4a
@@ -0,0 +1,361 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 7 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_PL2.asm
+
+
+
+//Module Name: Set_Buf_0123_PL2
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ //AVS LAYOUT: (YYUUVVAA)
+ //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+ //For PL2-AVS: V = 8, Y= 0, U = 4, A = 12.
+ mov (4) acc0.0<1>:w 0x6EA2:v //Subtract 6 from 0,4,8,12
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back
+ shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address.
+
+ //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+ //SU LAYOUT:(YUVAYUVA)
+ //V = 4, Y = 0, U = 2, A = 6
+ mov (4) acc0.0<1>:w 0x6204:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw
+ shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address.
+
+ //OFFSET:
+ mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL3.g4a b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL3.g4a
new file mode 100644
index 0000000..f4d1e1d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_PL3.g4a
@@ -0,0 +1,361 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 7 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_PL3.asm
+
+
+
+//Module Name: Set_Buf_0123_PL3
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ //AVS LAYOUT: (YYUUVVAA)
+ //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+ //For PL3-AVS: V = 8, Y= 0, U = 4, A = 12.
+ mov (4) acc0.0<1>:w 0x6EA2:v //Subtract 6 from 0,4,8,12
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back
+ shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address.
+
+ //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+ //SU LAYOUT:(YUVAYUVA)
+ //V = 4, Y = 0, U = 2, A = 6
+ mov (4) acc0.0<1>:w 0x6204:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw
+ shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address.
+
+ //OFFSET:
+ mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VUYA.g4a b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VUYA.g4a
new file mode 100644
index 0000000..97fda3c
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VUYA.g4a
@@ -0,0 +1,362 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 7 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_YUVA.asm
+
+
+
+// Module Name : Set_Buf_0123_VUYA
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ //For AVS: We use surface state as R8G8B8A8_UNORM and hence set pointers to VUYA.
+ //AVS LAYOUT:(VVUUYYAA)
+ //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+ //V = 0, Y= 8, U = 4, A = 12.
+ mov (4) acc0.0<1>:w 0x6E2A:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw
+ shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw
+
+ //Used by Shuffle.
+ //SU LAYOUT:(VUYAVUYA)
+ //V = 0, Y = 4, U = 2, A = 6
+ mov (4) acc0.0<1>:w 0x6240:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw
+ shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address.
+
+ //OFFSET:
+ mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VYUA.g4a b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VYUA.g4a
new file mode 100644
index 0000000..cde1aea
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_AVS_Buf_0123_VYUA.g4a
@@ -0,0 +1,362 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 7 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+//Module Name: Set_AVS_Buf_0123_VYUA.asm
+
+
+
+//Module Name: Set_Buf_0123_VYUA
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+ //AVS LAYOUT:(VVYYUUAA)
+ //Assign buffer channel order for Buffer 0123 in the order AUYV a0.3>A, a0.2>U, a0.1>Y, a0.0>V
+ // V = 0, Y= 4, U = 8, A = 12.
+ mov (4) acc0.0<1>:w 0x62EA:v //Subtract 6 from 0,4,8,12
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 70:uw //add 6 back
+ shl (4) r22.0<1>:w acc0<4;4,1>:w 5:uw //Convert to BYTE address.
+
+ //OPT: wAVS_SU_SHUFFLE_PTR_0 and udAVS_SU_SHUFFLE_OFF_0 are sub-regs of same GRF. -rT
+
+ //SU LAYOUT:(VYUAVYUA)
+ //V = 0, Y = 2, U = 4, A = 6
+ mov (4) acc0.0<1>:w 0x6420:v
+ add (4) acc0.0<1>:w acc0<4;4,1>:w 64:uw
+ shl (4) r18.0<1>:w acc0<4;4,1>:w 5:uw { NoDDClr } //Convert to BYTE address.
+
+ //OFFSET:
+ mov (1) r18.4<1>:ud 0x1000100:ud { NoDDChk }
+
+
diff --git a/src/shaders/post_processing/gen7/Set_Layer_0.g4a b/src/shaders/post_processing/gen7/Set_Layer_0.g4a
new file mode 100644
index 0000000..29cda8a
--- /dev/null
+++ b/src/shaders/post_processing/gen7/Set_Layer_0.g4a
@@ -0,0 +1,467 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// 18 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+
+
+
+
+//Module name: Set_Layer_N.inc
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Used to generate LABELS at compile time.
+
+
+//definitions for Expand Mask
+.declare uwMask_Temp1 Base=r17.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+.declare ubMask_Temp1 Base=r17.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF
+.declare udMask_Temp1 Base=r17.0 ElementSize=4 Type=ud // 1 GRF
+.declare uwMask_Temp2 Base=r16.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+.declare ubMask_Temp2 Base=r16.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF
+.declare udMask_Temp2 Base=r16.0 ElementSize=4 Type=ud // 1 GRF
+
+.declare uwMask_Temp3 Base=r15.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+.declare ubMask_Temp3 Base=r15.0 ElementSize=1 SrcRegion=<16;16,1> Type=ub // 1 GRF
+
+.declare udALPHA_MASK_REG Base=r21.0 ElementSize=4 Type=ud // 1 GRF
+.declare udALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//Initialize mask reg to FFFF
+
+ mov (16) uwALPHA_MASK_REG(0)<1> 0xFFFF:uw
+
+
+//Fast jump for -
+//LAYER0: we determine whether layer 0 is to be loaded and processed or not based
+// on block mask in module "Set_Layer_0" and store result in f0.1.
+// This flag is then directly used to while loading buf0-3 and colorfill.
+// (So flag f0.1 should not be changed from Set_Layer_0 till Colorfill)
+//
+//LAYER1-7: For all other layers, we compute whether layer is to be loaded and processed
+// based on block mask in module "Set_Layer_1-7" and store result in SKIP_LAYER
+// variable.
+// While Loading buf 4 and 5, we move SKIP_LAYER to f0.0 every time and use it
+// for Loading.
+// For processing though, we move SKIP_LAYER only once to f0.1 in module
+// "Set_Buf0_Buf4" and use f0.1 for deciding whether layer 1-7 (all 4 sub blocks)
+// is to be processed or not.
+// (So flag f0.1) should not be modififed from module "Set_Buf0_Buf4" till module
+// that processess sub-block 3).
+//
+//None of the above fast jumps, apply to CSC modules. We always perform CSC irrespective of mask.
+//
+//Example: (Without going into finer details)
+// Typical Combined kernel:
+//
+// (let var = decision whether to load/process that layer)
+//
+// Set_Layer_0 //f0.1 <- var
+// ..
+// Set_Layer_1 //f0.1 <- var, SKIP_LAYER <- var
+// ..
+// Load buf 0 //use f0.1
+// Load buf 4 //f0.0 <- SKIP_LAYER
+// Load buf 1 //use f0.1
+// Load buf 5 //f0.0 <- SKIP_LAYER
+// Load buf 2 //use f0.1
+// Load buf 3 //use f0.1
+// ..
+// ..
+// Colorfill
+// ..
+// Set_Buf0_Buf4 //f0.1 <- SKIP_LAYER
+// process0-4 //Use f0.1
+// Load buf 4
+// Set_Buf1_Buf5
+// process1-5
+// Load buf 5
+// ..
+// Set_Layer_2 //f0.1 <-var, SKIP_LAYER <- var
+// ..
+// Set_Buf2_Buf4
+// process2-4
+// Load buf 4
+// Set_Buf3_Buf5
+// process3-5
+// Load buf 5
+// ..
+
+
+ //For layer 0, use f0.1 directly
+ cmp.ne.f0.1 (1) null<1>:d r7.2:uw 0:uw
+ (f0.1)cmp.ne.f0.1 (1) null<1>:d r7.3:uw 0:uw
+ (-f0.1) jmpi (1) SKIP_LAYER_L0
+
+
+ and (1) r24.2:ub r2.2<0;1,0>:uw 3:uw
+
+
+ //Copy all AVS Payload data
+ // Setup Message Payload Header for 1st block of Media Sampler 8x8 (16x4 for IVB+)
+ mov (1) r25.6:f r7.5:f { NoDDClr } //NLAS dx
+ mov (1) r25.4:f r3.0:f { NoDDClr, NoDDChk } //Step X
+ mov (1) r25.5:f r4.0:f { NoDDClr, NoDDChk } //Step Y
+
+
+ mov (1) r25.2:f r6.0<0;1,0>:f { NoDDClr, NoDDChk } //Orig X
+ mov (1) r25.3:f r5.0<0;1,0>:f { NoDDChk } //Orig Y
+
+ //NLAS calculations for 2nd half of blocks of Media Sampler 8x8:
+ // X(i) = X0 + dx*i + ddx*i*(i-1)/2 ==> X(8) = X0 + dx*8 +ddx*28
+ // dx(i)= dx(0) + ddx*i ==> dx(8)= dx + ddx*8
+
+ //OPTIMIZATION: fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY - are sub registers of same GRF. Use NODDCLR NODDCHK. -rT
+
+ // Calculating X(8)
+ mov (1) acc0.2:f r6.0:f
+ mac (1) acc0.2:f r3.0:f 8.0:f
+ mac (1) r23.2:f r7.5:f 28.0:f { NoDDClr }
+
+ // Calculating Y(4)
+ mul (1) r23.1<1>:f r4.0:f 4.0:f { NoDDClr, NoDDChk } //dY*4
+
+ // Calculating dx(8)
+ mov (1) acc0.4:f r3.0:f
+ mac (1) r23.4:f r7.5:f 8.0:f { NoDDClr, NoDDChk }
+
+ // Binding Index
+ mov (1) r23.5:ud 0:ud { NoDDChk }
+
+
+SKIP_LAYER_L0:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/VP_Setup.g4a b/src/shaders/post_processing/gen7/VP_Setup.g4a
new file mode 100644
index 0000000..afde47d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/VP_Setup.g4a
@@ -0,0 +1,853 @@
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright 2000-2011 Intel Corporation All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// 326 // Total instruction count
+// 1 // Total kernel count
+
+
+
+// Module name: common.inc
+//
+// Common header file for all Video-Processing kernels
+//
+
+.default_execution_size (16)
+.default_register_type :ub
+
+.reg_count_total 128
+.reg_count_payload 7
+
+//========== Common constants ==========
+
+
+//========== Macros ==========
+
+
+//Fast Jump, For more details see "Set_Layer_N.asm"
+
+
+//========== Defines ====================
+
+//========== Static Parameters (Common To All) ==========
+//r1
+
+
+//r2
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+//Color Pipe (IECP) parameters
+
+
+//ByteCopy
+
+
+//r4
+
+ // e.g. byte0 byte1 byte2
+ // YUYV 0 1 3
+ // YVYU 0 3 1
+
+
+//========== Inline parameters (Common To All) ===========
+
+
+//============== Binding Index Table===========
+//Common between DNDI and DNUV
+
+
+//================= Common Message Descriptor =====
+// Message descriptor for thread spawning
+// Message Descriptors
+// = 000 0001 (min message len 1 ) 0,0000 (resp len 0 -add later)
+// 0000,0000,0000
+// 0001(Spawn a root thread),0001 (Root thread spawn thread)
+// = 0x02000011
+// Thread Spawner Message Descriptor
+
+
+// Message descriptor for atomic operation add
+// Message Descriptors
+// = 000 0110 (min message len 6 ) 0,0000 (resp len 0 -add later)
+// 1(header present)001,10(typed atomic operation)0(return enabled)0(slot group, low 8 bits),0111 (AOP_Add)
+// 0000,0000 (Binding table index, added later)
+// = 0x02000011
+
+// Atomic Operation Add Message Descriptor
+
+
+// Message descriptor for dataport media write
+ // Message Descriptors
+ // = 000 0001 (min message len 1 - add later) 00000 (resp len 0)
+ // 1 (header present 1) 0 1010 (media block write) 000000
+ // 00000000 (binding table index - set later)
+ // = 0x020A8000
+
+
+// Message Length defines
+
+
+// Response Length defines
+
+
+// Block Width and Height Size defines
+
+
+// Extended Message Descriptors
+
+
+// Common message descriptors:
+
+
+//===================== Math Function Control ===================================
+
+
+//============ Message Registers ===============
+ // buf4 starts from r28
+
+
+//#define mMSGHDR_EOT r43 // Dummy Message Register for EOT
+
+
+.declare mubMSGPAYLOAD Base=r30 ElementSize=1 SrcRegion=<16;16,1> Type=ub
+.declare muwMSGPAYLOAD Base=r30 ElementSize=2 SrcRegion=<16;16,1> Type=uw
+.declare mudMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=ud
+.declare mfMSGPAYLOAD Base=r30 ElementSize=4 SrcRegion=<8;8,1> Type=f
+
+//=================== End of thread instruction ===========================
+
+
+//=====================Pointers Used=====================================
+
+
+//=======================================================================
+
+
+//r9-r17
+// Define temp space for any usages
+
+
+// Common Buffers
+
+
+// temp space for rotation
+
+.declare fROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udROBUF Base=r9.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwROBUF Base=r9.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubROBUF Base=r9.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4ROBUF Base=r9.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+
+// End of common.inc
+
+
+// FileName: VP_Setup.asm
+// Author: Vivek Kumar
+// Description: Sets up all parameters for the Video Processing Kernel
+
+
+
+
+// Description: Includes all definitions explicit to Fast Composite.
+
+
+
+
+// End of common.inc
+
+
+//========== GRF partition ==========
+ // r0 header : r0 (1 GRF)
+ // Static parameters : r1 - r6 (6 GRFS)
+ // Inline parameters : r7 - r8 (2 GRFs)
+ // MSGSRC : r27 (1 GRF)
+//===================================
+
+//Interface:
+//========== Static Parameters (Explicit To Fast Composite) ==========
+//r1
+//CSC Set 0
+
+
+.declare udCSC_CURBE Base=r1.0 ElementSize=4 Type=ud
+
+//Constant alpha
+
+
+//r2
+
+
+// Gen7 AVS WA
+
+
+// WiDi Definitions
+
+
+//Colorfill
+
+
+ // 0: 0-degree, 1: 90, 2: 180, 3: 270-degree, clockwise.
+
+.declare ubCOLOR_PIXEL_VAL Base=r2.20 ElementSize=1 SrcRegion=<0;1,0> DstRegion=<1> Type=ub
+
+//r3
+//Normalised Ratio of Horizontal step size with main video for all layers
+
+
+ //Normalised Ratio of Horizontal step size with main video for all layers becomes
+ //Normalised Horizontal step size for all layers in VP_Setup.asm
+
+
+//r4
+//Normalised Vertical step size for all layers
+
+
+//r5
+//Normalised Vertical Frame Origin for all layers
+
+
+//r6
+//Normalised Horizontal Frame Origin for all layers
+
+
+//========== Inline Parameters (Explicit To Fast Composite) ==========
+
+
+//Main video Step X
+
+
+//====================== Binding table (Explicit To Fast Composite)=========================================
+
+
+//Used by Interlaced Scaling Kernels
+
+
+//========== Sampler State Table Index (Explicit To Fast Composite)==========
+//Sampler Index for AVS/IEF messages
+
+
+//Sampler Index for SIMD16 sampler messages
+
+
+//=============================================================================
+
+.declare fBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+.declare fBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=f
+
+.declare udBUFFER_0 Base=r64.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_1 Base=r80.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_2 Base=r96.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_3 Base=r112.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_4 Base=r28.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+.declare udBUFFER_5 Base=r46.0 ElementSize=4 SrcRegion=<8;8,1> DstRegion=<1> Type=ud
+
+.declare uwBUFFER_0 Base=r64.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_1 Base=r80.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_2 Base=r96.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_3 Base=r112.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_4 Base=r28.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+.declare uwBUFFER_5 Base=r46.0 ElementSize=2 SrcRegion=<16;16,1> DstRegion=<1> Type=uw
+
+.declare ubBUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+.declare ubBUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<16;16,1> DstRegion=<1> Type=ub
+
+.declare ub4BUFFER_0 Base=r64.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_1 Base=r80.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_2 Base=r96.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_3 Base=r112.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_4 Base=r28.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+.declare ub4BUFFER_5 Base=r46.0 ElementSize=1 SrcRegion=<32;8,4> DstRegion=<4> Type=ub
+
+//Pointer to mask reg
+
+
+//r18
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+.declare udCSC_COEFF_0 Base=r18.0 ElementSize=4 Type=ud // 1 GRF
+
+//r19
+
+
+.declare udCSC_COEFF_1 Base=r19.0 ElementSize=4 Type=ud // 1 GRF
+
+
+//r20
+
+.declare uwALPHA_MASK_REG_TEMP Base=r20.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r21
+
+.declare uwALPHA_MASK_REG Base=r21.0 ElementSize=2 SrcRegion=<16;16,1> Type=uw // 1 GRF
+
+//r22
+
+
+//Always keep Cannel Pointers and Offsets in same GRF, so that we can use
+// NODDCLR, NODDCHK flags. -rT
+
+
+//Keep fORIGIN_X_NLAS, fY_OFFSET_2ND_BLOCK, fSTEP_X_NLAS, pMSGDSC_COPY, ubCONST_ALPHA_COPY as
+//sub registers of same GRF to enable using NODDCLR NODDCHK. -rT
+
+//r23
+
+
+//Lumakey
+
+
+//r24
+
+
+//r25
+
+
+//r26
+
+
+//defines to generate LABELS during compile time.
+
+
+//Setup pointer to the inline parameter
+
+// Copy MSG HDR
+ mov (8) r27.0<1>:ud r0.0<8;8,1>:ud // Initialize message payload header with R0
+
+
+//temp; remove it once unread msg warnings are resolved -vK
+mov (8) r25:ud r0.0<8;8,1>:ud
+mov (8) r26:ud r0.0<8;8,1>:ud
+
+// Calculate StepX for all layers and overwrite it on the ratio
+ mul (8) r3.0<1>:f r3.0<8;8,1>:f r7.4<0;1,0>:f //StepX_ratio = StepX / VideoStepX
+
+ //Normalised Ratio of Horizontal step size with main video for all layers now becomes
+ //Normalised Horizontal step size for all layers
+
+// Calculate block origin for all layers and overwrite it on the frame origin
+ mov (2) r8.5<1>:f r7.0<2;2,1>:w //Convert origin from word to float
+
+ cmp.e.f0.0 (8) null<1>:d r2.26:ub 1:uw
+
+
+ shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 0:uw
+ and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw
+ (f0.1) jmpi (1) ROTATE_90_L0
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw
+ (f0.1) jmpi (1) ROTATE_180_L0
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw
+ (f0.1) jmpi (1) ROTATE_270_L0
+
+ // rotate 0 degree
+ROTATE_0_L0:
+ (-f0.0)mov (1) acc0.0:f r6.0<0;1,0>:f
+ (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r8.5<0;1,0>:f
+
+ mov (1) acc0.0:f r5.0<0;1,0>:f
+ mac (1) r5.0<1>:f r4.0<0;1,0>:f r8.6<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0
+
+ // rotate 90 degree
+ROTATE_90_L0:
+ (-f0.0)mov (1) acc0.0:f r6.0<0;1,0>:f
+ (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r8.6<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+
+ mov (1) acc0.0:f r5.0<0;1,0>:f
+ mac (1) r5.0<1>:f r4.0<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0
+
+ // rotate 180 degree
+ROTATE_180_L0:
+ (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.0:f r6.0<0;1,0>:f
+ (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ mov (1) acc0.0:f r5.0<0;1,0>:f
+ mac (1) r5.0<1>:f r4.0<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L0
+
+ // rotate 270 degree
+ROTATE_270_L0:
+ (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.0:f r6.0<0;1,0>:f
+ (-f0.0)mac (1) r6.0<1>:f r3.0<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) acc0.0:f r5.0<0;1,0>:f
+ mac (1) r5.0<1>:f r4.0<0;1,0>:f r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L0:
+ nop
+ shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 2:uw
+ and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw
+ (f0.1) jmpi (1) ROTATE_90_L1
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw
+ (f0.1) jmpi (1) ROTATE_180_L1
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw
+ (f0.1) jmpi (1) ROTATE_270_L1
+
+ // rotate 0 degree
+ROTATE_0_L1:
+ (-f0.0)mov (1) acc0.1:f r6.1<0;1,0>:f
+ (-f0.0)mac (1) r6.1<1>:f r3.1<0;1,0>:f r8.5<0;1,0>:f
+
+ mov (1) acc0.1:f r5.1<0;1,0>:f
+ mac (1) r5.1<1>:f r4.1<0;1,0>:f r8.6<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L1
+
+ // rotate 90 degree
+ROTATE_90_L1:
+ (-f0.0)mov (1) acc0.1:f r6.1<0;1,0>:f
+ (-f0.0)mac (1) r6.1<1>:f r3.1<0;1,0>:f r8.6<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+
+ mov (1) acc0.1:f r5.1<0;1,0>:f
+ mac (1) r5.1<1>:f r4.1<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L1
+
+ // rotate 180 degree
+ROTATE_180_L1:
+ (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.1:f r6.1<0;1,0>:f
+ (-f0.0)mac (1) r6.1<1>:f r3.1<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ mov (1) acc0.1:f r5.1<0;1,0>:f
+ mac (1) r5.1<1>:f r4.1<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L1
+
+ // rotate 270 degree
+ROTATE_270_L1:
+ (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.1:f r6.1<0;1,0>:f
+ (-f0.0)mac (1) r6.1<1>:f r3.1<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) acc0.1:f r5.1<0;1,0>:f
+ mac (1) r5.1<1>:f r4.1<0;1,0>:f r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L1:
+ nop
+ shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 4:uw
+ and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw
+ (f0.1) jmpi (1) ROTATE_90_L2
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw
+ (f0.1) jmpi (1) ROTATE_180_L2
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw
+ (f0.1) jmpi (1) ROTATE_270_L2
+
+ // rotate 0 degree
+ROTATE_0_L2:
+ (-f0.0)mov (1) acc0.2:f r6.2<0;1,0>:f
+ (-f0.0)mac (1) r6.2<1>:f r3.2<0;1,0>:f r8.5<0;1,0>:f
+
+ mov (1) acc0.2:f r5.2<0;1,0>:f
+ mac (1) r5.2<1>:f r4.2<0;1,0>:f r8.6<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L2
+
+ // rotate 90 degree
+ROTATE_90_L2:
+ (-f0.0)mov (1) acc0.2:f r6.2<0;1,0>:f
+ (-f0.0)mac (1) r6.2<1>:f r3.2<0;1,0>:f r8.6<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+
+ mov (1) acc0.2:f r5.2<0;1,0>:f
+ mac (1) r5.2<1>:f r4.2<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L2
+
+ // rotate 180 degree
+ROTATE_180_L2:
+ (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.2:f r6.2<0;1,0>:f
+ (-f0.0)mac (1) r6.2<1>:f r3.2<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ mov (1) acc0.2:f r5.2<0;1,0>:f
+ mac (1) r5.2<1>:f r4.2<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L2
+
+ // rotate 270 degree
+ROTATE_270_L2:
+ (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.2:f r6.2<0;1,0>:f
+ (-f0.0)mac (1) r6.2<1>:f r3.2<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) acc0.2:f r5.2<0;1,0>:f
+ mac (1) r5.2<1>:f r4.2<0;1,0>:f r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L2:
+ nop
+ shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 6:uw
+ and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw
+ (f0.1) jmpi (1) ROTATE_90_L3
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw
+ (f0.1) jmpi (1) ROTATE_180_L3
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw
+ (f0.1) jmpi (1) ROTATE_270_L3
+
+ // rotate 0 degree
+ROTATE_0_L3:
+ (-f0.0)mov (1) acc0.3:f r6.3<0;1,0>:f
+ (-f0.0)mac (1) r6.3<1>:f r3.3<0;1,0>:f r8.5<0;1,0>:f
+
+ mov (1) acc0.3:f r5.3<0;1,0>:f
+ mac (1) r5.3<1>:f r4.3<0;1,0>:f r8.6<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L3
+
+ // rotate 90 degree
+ROTATE_90_L3:
+ (-f0.0)mov (1) acc0.3:f r6.3<0;1,0>:f
+ (-f0.0)mac (1) r6.3<1>:f r3.3<0;1,0>:f r8.6<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+
+ mov (1) acc0.3:f r5.3<0;1,0>:f
+ mac (1) r5.3<1>:f r4.3<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L3
+
+ // rotate 180 degree
+ROTATE_180_L3:
+ (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.3:f r6.3<0;1,0>:f
+ (-f0.0)mac (1) r6.3<1>:f r3.3<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ mov (1) acc0.3:f r5.3<0;1,0>:f
+ mac (1) r5.3<1>:f r4.3<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L3
+
+ // rotate 270 degree
+ROTATE_270_L3:
+ (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.3:f r6.3<0;1,0>:f
+ (-f0.0)mac (1) r6.3<1>:f r3.3<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) acc0.3:f r5.3<0;1,0>:f
+ mac (1) r5.3<1>:f r4.3<0;1,0>:f r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L3:
+ nop
+ shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 8:uw
+ and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw
+ (f0.1) jmpi (1) ROTATE_90_L4
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw
+ (f0.1) jmpi (1) ROTATE_180_L4
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw
+ (f0.1) jmpi (1) ROTATE_270_L4
+
+ // rotate 0 degree
+ROTATE_0_L4:
+ (-f0.0)mov (1) acc0.4:f r6.4<0;1,0>:f
+ (-f0.0)mac (1) r6.4<1>:f r3.4<0;1,0>:f r8.5<0;1,0>:f
+
+ mov (1) acc0.4:f r5.4<0;1,0>:f
+ mac (1) r5.4<1>:f r4.4<0;1,0>:f r8.6<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L4
+
+ // rotate 90 degree
+ROTATE_90_L4:
+ (-f0.0)mov (1) acc0.4:f r6.4<0;1,0>:f
+ (-f0.0)mac (1) r6.4<1>:f r3.4<0;1,0>:f r8.6<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+
+ mov (1) acc0.4:f r5.4<0;1,0>:f
+ mac (1) r5.4<1>:f r4.4<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L4
+
+ // rotate 180 degree
+ROTATE_180_L4:
+ (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.4:f r6.4<0;1,0>:f
+ (-f0.0)mac (1) r6.4<1>:f r3.4<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ mov (1) acc0.4:f r5.4<0;1,0>:f
+ mac (1) r5.4<1>:f r4.4<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L4
+
+ // rotate 270 degree
+ROTATE_270_L4:
+ (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.4:f r6.4<0;1,0>:f
+ (-f0.0)mac (1) r6.4<1>:f r3.4<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) acc0.4:f r5.4<0;1,0>:f
+ mac (1) r5.4<1>:f r4.4<0;1,0>:f r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L4:
+ nop
+ shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 10:uw
+ and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw
+ (f0.1) jmpi (1) ROTATE_90_L5
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw
+ (f0.1) jmpi (1) ROTATE_180_L5
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw
+ (f0.1) jmpi (1) ROTATE_270_L5
+
+ // rotate 0 degree
+ROTATE_0_L5:
+ (-f0.0)mov (1) acc0.5:f r6.5<0;1,0>:f
+ (-f0.0)mac (1) r6.5<1>:f r3.5<0;1,0>:f r8.5<0;1,0>:f
+
+ mov (1) acc0.5:f r5.5<0;1,0>:f
+ mac (1) r5.5<1>:f r4.5<0;1,0>:f r8.6<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L5
+
+ // rotate 90 degree
+ROTATE_90_L5:
+ (-f0.0)mov (1) acc0.5:f r6.5<0;1,0>:f
+ (-f0.0)mac (1) r6.5<1>:f r3.5<0;1,0>:f r8.6<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+
+ mov (1) acc0.5:f r5.5<0;1,0>:f
+ mac (1) r5.5<1>:f r4.5<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L5
+
+ // rotate 180 degree
+ROTATE_180_L5:
+ (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.5:f r6.5<0;1,0>:f
+ (-f0.0)mac (1) r6.5<1>:f r3.5<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ mov (1) acc0.5:f r5.5<0;1,0>:f
+ mac (1) r5.5<1>:f r4.5<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L5
+
+ // rotate 270 degree
+ROTATE_270_L5:
+ (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.5:f r6.5<0;1,0>:f
+ (-f0.0)mac (1) r6.5<1>:f r3.5<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) acc0.5:f r5.5<0;1,0>:f
+ mac (1) r5.5<1>:f r4.5<0;1,0>:f r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L5:
+ nop
+ shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 12:uw
+ and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw
+ (f0.1) jmpi (1) ROTATE_90_L6
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw
+ (f0.1) jmpi (1) ROTATE_180_L6
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw
+ (f0.1) jmpi (1) ROTATE_270_L6
+
+ // rotate 0 degree
+ROTATE_0_L6:
+ (-f0.0)mov (1) acc0.6:f r6.6<0;1,0>:f
+ (-f0.0)mac (1) r6.6<1>:f r3.6<0;1,0>:f r8.5<0;1,0>:f
+
+ mov (1) acc0.6:f r5.6<0;1,0>:f
+ mac (1) r5.6<1>:f r4.6<0;1,0>:f r8.6<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L6
+
+ // rotate 90 degree
+ROTATE_90_L6:
+ (-f0.0)mov (1) acc0.6:f r6.6<0;1,0>:f
+ (-f0.0)mac (1) r6.6<1>:f r3.6<0;1,0>:f r8.6<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+
+ mov (1) acc0.6:f r5.6<0;1,0>:f
+ mac (1) r5.6<1>:f r4.6<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L6
+
+ // rotate 180 degree
+ROTATE_180_L6:
+ (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.6:f r6.6<0;1,0>:f
+ (-f0.0)mac (1) r6.6<1>:f r3.6<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ mov (1) acc0.6:f r5.6<0;1,0>:f
+ mac (1) r5.6<1>:f r4.6<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L6
+
+ // rotate 270 degree
+ROTATE_270_L6:
+ (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.6:f r6.6<0;1,0>:f
+ (-f0.0)mac (1) r6.6<1>:f r3.6<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) acc0.6:f r5.6<0;1,0>:f
+ mac (1) r5.6<1>:f r4.6<0;1,0>:f r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L6:
+ nop
+ shr (1) r17.0<1>:uw r2.2<0;1,0>:uw 14:uw
+ and (1) r17.0<1>:uw r17.0<0;1,0>:uw 3:uw
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 1:uw
+ (f0.1) jmpi (1) ROTATE_90_L7
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 2:uw
+ (f0.1) jmpi (1) ROTATE_180_L7
+ cmp.e.f0.1 (1) null<1>:w r17.0<0;1,0>:uw 3:uw
+ (f0.1) jmpi (1) ROTATE_270_L7
+
+ // rotate 0 degree
+ROTATE_0_L7:
+ (-f0.0)mov (1) acc0.7:f r6.7<0;1,0>:f
+ (-f0.0)mac (1) r6.7<1>:f r3.7<0;1,0>:f r8.5<0;1,0>:f
+
+ mov (1) acc0.7:f r5.7<0;1,0>:f
+ mac (1) r5.7<1>:f r4.7<0;1,0>:f r8.6<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L7
+
+ // rotate 90 degree
+ROTATE_90_L7:
+ (-f0.0)mov (1) acc0.7:f r6.7<0;1,0>:f
+ (-f0.0)mac (1) r6.7<1>:f r3.7<0;1,0>:f r8.6<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+
+ mov (1) acc0.7:f r5.7<0;1,0>:f
+ mac (1) r5.7<1>:f r4.7<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L7
+
+ // rotate 180 degree
+ROTATE_180_L7:
+ (-f0.0)mov (1) r16.0<1>:f r2.0<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.5<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.7:f r6.7<0;1,0>:f
+ (-f0.0)mac (1) r6.7<1>:f r3.7<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ mov (1) acc0.7:f r5.7<0;1,0>:f
+ mac (1) r5.7<1>:f r4.7<0;1,0>:f r17.0<0;1,0>:f
+ jmpi (1) END_SRC_BLOCK_ORIG_COMP_L7
+
+ // rotate 270 degree
+ROTATE_270_L7:
+ (-f0.0)mov (1) r16.0<1>:f r2.1<0;1,0>:uw
+ (-f0.0)add (1) r17.0<1>:f -r8.6<0;1,0>:f r16.0<0;1,0>:f
+ (-f0.0)add (1) r17.0<1>:f r17.0<0;1,0>:f -16.0:f
+ (-f0.0)mov (1) acc0.7:f r6.7<0;1,0>:f
+ (-f0.0)mac (1) r6.7<1>:f r3.7<0;1,0>:f r17.0<0;1,0>:f
+
+ mov (1) acc0.7:f r5.7<0;1,0>:f
+ mac (1) r5.7<1>:f r4.7<0;1,0>:f r8.5<0;1,0>:f
+
+END_SRC_BLOCK_ORIG_COMP_L7:
+ nop
+
+
diff --git a/src/shaders/post_processing/gen7/avs.asm b/src/shaders/post_processing/gen7/avs.asm
new file mode 100644
index 0000000..091ed50
--- /dev/null
+++ b/src/shaders/post_processing/gen7/avs.asm
@@ -0,0 +1,19 @@
+// Module name: AVS
+.kernel AVS
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL2.g4a"
+#include "PL2_AVS_Buf_0.g4a"
+#include "PL2_AVS_Buf_1.g4a"
+#include "PL2_AVS_Buf_2.g4a"
+#include "PL2_AVS_Buf_3.g4a"
+#include "Save_AVS_NV12.g4a"
+#include "EOT.g4a"
+
+.end_code
+
+.end_kernel
+
+// end of DNDI
diff --git a/src/shaders/post_processing/gen7/avs.g75b b/src/shaders/post_processing/gen7/avs.g75b
new file mode 100644
index 0000000..c25432e
--- /dev/null
+++ b/src/shaders/post_processing/gen7/avs.g75b
@@ -0,0 +1,654 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000290 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+ { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/avs.g7b b/src/shaders/post_processing/gen7/avs.g7b
new file mode 100644
index 0000000..445ae01
--- /dev/null
+++ b/src/shaders/post_processing/gen7/avs.g7b
@@ -0,0 +1,654 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000052 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+ { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/dndi.asm b/src/shaders/post_processing/gen7/dndi.asm
new file mode 100644
index 0000000..b820fdb
--- /dev/null
+++ b/src/shaders/post_processing/gen7/dndi.asm
@@ -0,0 +1,11 @@
+// Module name: DNDI
+.kernel DNDI
+.code
+
+#include "NV12_DI_NV12.g4a"
+
+.end_code
+
+.end_kernel
+
+// end of DNDI
diff --git a/src/shaders/post_processing/gen7/dndi.g75b b/src/shaders/post_processing/gen7/dndi.g75b
new file mode 100644
index 0000000..ffa9d49
--- /dev/null
+++ b/src/shaders/post_processing/gen7/dndi.g75b
@@ -0,0 +1,46 @@
+ { 0x00600001, 0x22400021, 0x008d0000, 0x00000000 },
+ { 0x00000401, 0x226801ad, 0x000000e0, 0x00000000 },
+ { 0x00000801, 0x227801ad, 0x000000e2, 0x00000000 },
+ { 0x02600031, 0x25c00e21, 0x00000240, 0x04ae8003 },
+ { 0x00200001, 0x20e0012d, 0x004506fc, 0x00000000 },
+ { 0x00600001, 0x22800021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x22a00021, 0x008d06c0, 0x00000000 },
+ { 0x00000408, 0x22803da1, 0x000000e0, 0x00010001 },
+ { 0x00000c01, 0x228401a1, 0x000000e2, 0x00000000 },
+ { 0x00000801, 0x22880061, 0x00000000, 0x00030007 },
+ { 0x05600031, 0x20000e24, 0x00000280, 0x040a8021 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000021, 0x008d0000, 0x00000000 },
+ { 0x00000408, 0x23003da1, 0x000000e0, 0x00010001 },
+ { 0x00000041, 0x24043da0, 0x000000e2, 0x00030003 },
+ { 0x00000c08, 0x23043c01, 0x00000404, 0x00020002 },
+ { 0x00000801, 0x23080061, 0x00000000, 0x00020007 },
+ { 0x00200040, 0x23002421, 0x00450300, 0x00450038 },
+ { 0x00000401, 0x23200021, 0x000006e4, 0x00000000 },
+ { 0x00200c01, 0x432c0021, 0x004506ec, 0x00000000 },
+ { 0x00200801, 0x43280021, 0x004506f4, 0x00000000 },
+ { 0x05600031, 0x20000e24, 0x00000300, 0x040a8021 },
+ { 0x00600001, 0x23800021, 0x008d0000, 0x00000000 },
+ { 0x00000401, 0x238001a5, 0x000000e0, 0x00000000 },
+ { 0x00000c01, 0x238401a5, 0x000000e2, 0x00000000 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x22400021, 0x008d0380, 0x00000000 },
+ { 0x00600001, 0x22e00021, 0x008d0380, 0x00000000 },
+ { 0x00600001, 0x22600021, 0x008d05c0, 0x00000000 },
+ { 0x00600001, 0x22800021, 0x008d05e0, 0x00000000 },
+ { 0x00600001, 0x23000021, 0x008d0640, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0660, 0x00000000 },
+ { 0x0000040c, 0x23843ca5, 0x00000384, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x0001000f },
+ { 0x00800401, 0x42c00231, 0x00ce0601, 0x00000000 },
+ { 0x00800801, 0x42c10231, 0x00ce0600, 0x00000000 },
+ { 0x00800401, 0x43600231, 0x00ce0681, 0x00000000 },
+ { 0x00800801, 0x43610231, 0x00ce0680, 0x00000000 },
+ { 0x00600001, 0x22a00021, 0x008d0380, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0380, 0x00000000 },
+ { 0x05600031, 0x20000e24, 0x00000240, 0x060a801b },
+ { 0x05600031, 0x20000e24, 0x000002e0, 0x060a801e },
+ { 0x05600031, 0x20000e24, 0x000002a0, 0x040a801c },
+ { 0x05600031, 0x20000e24, 0x00000340, 0x040a801f },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/dndi.g7b b/src/shaders/post_processing/gen7/dndi.g7b
new file mode 100644
index 0000000..ffa9d49
--- /dev/null
+++ b/src/shaders/post_processing/gen7/dndi.g7b
@@ -0,0 +1,46 @@
+ { 0x00600001, 0x22400021, 0x008d0000, 0x00000000 },
+ { 0x00000401, 0x226801ad, 0x000000e0, 0x00000000 },
+ { 0x00000801, 0x227801ad, 0x000000e2, 0x00000000 },
+ { 0x02600031, 0x25c00e21, 0x00000240, 0x04ae8003 },
+ { 0x00200001, 0x20e0012d, 0x004506fc, 0x00000000 },
+ { 0x00600001, 0x22800021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x22a00021, 0x008d06c0, 0x00000000 },
+ { 0x00000408, 0x22803da1, 0x000000e0, 0x00010001 },
+ { 0x00000c01, 0x228401a1, 0x000000e2, 0x00000000 },
+ { 0x00000801, 0x22880061, 0x00000000, 0x00030007 },
+ { 0x05600031, 0x20000e24, 0x00000280, 0x040a8021 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000021, 0x008d0000, 0x00000000 },
+ { 0x00000408, 0x23003da1, 0x000000e0, 0x00010001 },
+ { 0x00000041, 0x24043da0, 0x000000e2, 0x00030003 },
+ { 0x00000c08, 0x23043c01, 0x00000404, 0x00020002 },
+ { 0x00000801, 0x23080061, 0x00000000, 0x00020007 },
+ { 0x00200040, 0x23002421, 0x00450300, 0x00450038 },
+ { 0x00000401, 0x23200021, 0x000006e4, 0x00000000 },
+ { 0x00200c01, 0x432c0021, 0x004506ec, 0x00000000 },
+ { 0x00200801, 0x43280021, 0x004506f4, 0x00000000 },
+ { 0x05600031, 0x20000e24, 0x00000300, 0x040a8021 },
+ { 0x00600001, 0x23800021, 0x008d0000, 0x00000000 },
+ { 0x00000401, 0x238001a5, 0x000000e0, 0x00000000 },
+ { 0x00000c01, 0x238401a5, 0x000000e2, 0x00000000 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x22400021, 0x008d0380, 0x00000000 },
+ { 0x00600001, 0x22e00021, 0x008d0380, 0x00000000 },
+ { 0x00600001, 0x22600021, 0x008d05c0, 0x00000000 },
+ { 0x00600001, 0x22800021, 0x008d05e0, 0x00000000 },
+ { 0x00600001, 0x23000021, 0x008d0640, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0660, 0x00000000 },
+ { 0x0000040c, 0x23843ca5, 0x00000384, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x0001000f },
+ { 0x00800401, 0x42c00231, 0x00ce0601, 0x00000000 },
+ { 0x00800801, 0x42c10231, 0x00ce0600, 0x00000000 },
+ { 0x00800401, 0x43600231, 0x00ce0681, 0x00000000 },
+ { 0x00800801, 0x43610231, 0x00ce0680, 0x00000000 },
+ { 0x00600001, 0x22a00021, 0x008d0380, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0380, 0x00000000 },
+ { 0x05600031, 0x20000e24, 0x00000240, 0x060a801b },
+ { 0x05600031, 0x20000e24, 0x000002e0, 0x060a801e },
+ { 0x05600031, 0x20000e24, 0x000002a0, 0x040a801c },
+ { 0x05600031, 0x20000e24, 0x00000340, 0x040a801f },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/nv12_dn_nv12.asm b/src/shaders/post_processing/gen7/nv12_dn_nv12.asm
new file mode 100644
index 0000000..94e7c6b
--- /dev/null
+++ b/src/shaders/post_processing/gen7/nv12_dn_nv12.asm
@@ -0,0 +1,5 @@
+// Module name: DN
+
+#include "NV12_DN_NV12.g4a"
+
+// end of DNDI
diff --git a/src/shaders/post_processing/gen7/nv12_dn_nv12.g75b b/src/shaders/post_processing/gen7/nv12_dn_nv12.g75b
new file mode 100644
index 0000000..a43e216
--- /dev/null
+++ b/src/shaders/post_processing/gen7/nv12_dn_nv12.g75b
@@ -0,0 +1,40 @@
+ { 0x00600001, 0x22400021, 0x008d0000, 0x00000000 },
+ { 0x00000401, 0x226801ad, 0x000000e0, 0x00000000 },
+ { 0x00000801, 0x227801ad, 0x000000e2, 0x00000000 },
+ { 0x02600031, 0x25c00e21, 0x00000240, 0x045e8003 },
+ { 0x00200001, 0x20e0012d, 0x0045065c, 0x00000000 },
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00200001, 0x22e00021, 0x00450640, 0x00000000 },
+ { 0x00200008, 0x23603da1, 0x004500e0, 0x00020002 },
+ { 0x00000440, 0x23602421, 0x00000360, 0x00000038 },
+ { 0x00000801, 0x23680061, 0x00000000, 0x00010003 },
+ { 0x00600001, 0x22c00021, 0x008d0360, 0x00000000 },
+ { 0x05600031, 0x20000e24, 0x000002c0, 0x040a8021 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000021, 0x008d0000, 0x00000000 },
+ { 0x00000408, 0x23003da1, 0x000000e0, 0x00010001 },
+ { 0x00000041, 0x24043da0, 0x000000e2, 0x00030003 },
+ { 0x00000c08, 0x23043c01, 0x00000404, 0x00020002 },
+ { 0x00000801, 0x23080061, 0x00000000, 0x00050003 },
+ { 0x00200040, 0x23002421, 0x00450300, 0x00450038 },
+ { 0x00000401, 0x23200231, 0x00000648, 0x00000000 },
+ { 0x00000c01, 0x23260129, 0x00000656, 0x00000000 },
+ { 0x00200c01, 0x23280129, 0x00450658, 0x00000000 },
+ { 0x00000c01, 0x23320129, 0x00000650, 0x00000000 },
+ { 0x00200801, 0x23340129, 0x00450652, 0x00000000 },
+ { 0x05600031, 0x20000e24, 0x00000300, 0x040a8021 },
+ { 0x00200040, 0x236035a5, 0x004500e0, 0x00450088 },
+ { 0x0000040c, 0x23643ca5, 0x00000364, 0x00010001 },
+ { 0x00000801, 0x23680061, 0x00000000, 0x0004000f },
+ { 0x00600001, 0x24800021, 0x008d0360, 0x00000000 },
+ { 0x04600031, 0x27400e21, 0x00000480, 0x02390001 },
+ { 0x00000001, 0x24880061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x25a00021, 0x008d0000, 0x00000000 },
+ { 0x00200401, 0x25a001a1, 0x004500e0, 0x00000000 },
+ { 0x00000801, 0x25a80061, 0x00000000, 0x0007000f },
+ { 0x05600031, 0x20000e24, 0x000005a0, 0x0a0a8018 },
+ { 0x00600001, 0x24a00021, 0x008d0740, 0x00000000 },
+ { 0x00600001, 0x24c00021, 0x008d0760, 0x00000000 },
+ { 0x05600031, 0x20000e24, 0x00000480, 0x060a8019 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/nv12_dn_nv12.g7b b/src/shaders/post_processing/gen7/nv12_dn_nv12.g7b
new file mode 100644
index 0000000..a43e216
--- /dev/null
+++ b/src/shaders/post_processing/gen7/nv12_dn_nv12.g7b
@@ -0,0 +1,40 @@
+ { 0x00600001, 0x22400021, 0x008d0000, 0x00000000 },
+ { 0x00000401, 0x226801ad, 0x000000e0, 0x00000000 },
+ { 0x00000801, 0x227801ad, 0x000000e2, 0x00000000 },
+ { 0x02600031, 0x25c00e21, 0x00000240, 0x045e8003 },
+ { 0x00200001, 0x20e0012d, 0x0045065c, 0x00000000 },
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00200001, 0x22e00021, 0x00450640, 0x00000000 },
+ { 0x00200008, 0x23603da1, 0x004500e0, 0x00020002 },
+ { 0x00000440, 0x23602421, 0x00000360, 0x00000038 },
+ { 0x00000801, 0x23680061, 0x00000000, 0x00010003 },
+ { 0x00600001, 0x22c00021, 0x008d0360, 0x00000000 },
+ { 0x05600031, 0x20000e24, 0x000002c0, 0x040a8021 },
+ { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x23000021, 0x008d0000, 0x00000000 },
+ { 0x00000408, 0x23003da1, 0x000000e0, 0x00010001 },
+ { 0x00000041, 0x24043da0, 0x000000e2, 0x00030003 },
+ { 0x00000c08, 0x23043c01, 0x00000404, 0x00020002 },
+ { 0x00000801, 0x23080061, 0x00000000, 0x00050003 },
+ { 0x00200040, 0x23002421, 0x00450300, 0x00450038 },
+ { 0x00000401, 0x23200231, 0x00000648, 0x00000000 },
+ { 0x00000c01, 0x23260129, 0x00000656, 0x00000000 },
+ { 0x00200c01, 0x23280129, 0x00450658, 0x00000000 },
+ { 0x00000c01, 0x23320129, 0x00000650, 0x00000000 },
+ { 0x00200801, 0x23340129, 0x00450652, 0x00000000 },
+ { 0x05600031, 0x20000e24, 0x00000300, 0x040a8021 },
+ { 0x00200040, 0x236035a5, 0x004500e0, 0x00450088 },
+ { 0x0000040c, 0x23643ca5, 0x00000364, 0x00010001 },
+ { 0x00000801, 0x23680061, 0x00000000, 0x0004000f },
+ { 0x00600001, 0x24800021, 0x008d0360, 0x00000000 },
+ { 0x04600031, 0x27400e21, 0x00000480, 0x02390001 },
+ { 0x00000001, 0x24880061, 0x00000000, 0x0003000f },
+ { 0x00600001, 0x25a00021, 0x008d0000, 0x00000000 },
+ { 0x00200401, 0x25a001a1, 0x004500e0, 0x00000000 },
+ { 0x00000801, 0x25a80061, 0x00000000, 0x0007000f },
+ { 0x05600031, 0x20000e24, 0x000005a0, 0x0a0a8018 },
+ { 0x00600001, 0x24a00021, 0x008d0740, 0x00000000 },
+ { 0x00600001, 0x24c00021, 0x008d0760, 0x00000000 },
+ { 0x05600031, 0x20000e24, 0x00000480, 0x060a8019 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pa_to_pl2.asm b/src/shaders/post_processing/gen7/pa_to_pl2.asm
new file mode 100644
index 0000000..1e952b4
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl2.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PA_TO_pl2
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_VYUA.g4a"
+#include "PA_AVS_Buf_0.g4a"
+#include "PA_AVS_Buf_1.g4a"
+#include "PA_AVS_Buf_2.g4a"
+#include "PA_AVS_Buf_3.g4a"
+#include "Save_AVS_NV12.g4a"
+#include "EOT.g4a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pa_to_pl2.g75b b/src/shaders/post_processing/gen7/pa_to_pl2.g75b
new file mode 100644
index 0000000..c6aa5fb
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl2.g75b
@@ -0,0 +1,633 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000190 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+ { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pa_to_pl2.g7b b/src/shaders/post_processing/gen7/pa_to_pl2.g7b
new file mode 100644
index 0000000..d3605bc
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl2.g7b
@@ -0,0 +1,633 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000032 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+ { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pa_to_pl3.asm b/src/shaders/post_processing/gen7/pa_to_pl3.asm
new file mode 100644
index 0000000..d149f48
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PA_TO_PL3
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_VYUA.g4a"
+#include "PA_AVS_Buf_0.g4a"
+#include "PA_AVS_Buf_1.g4a"
+#include "PA_AVS_Buf_2.g4a"
+#include "PA_AVS_Buf_3.g4a"
+#include "Save_AVS_PL3.g4a"
+#include "EOT.g4a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pa_to_pl3.g75b b/src/shaders/post_processing/gen7/pa_to_pl3.g75b
new file mode 100644
index 0000000..bf68443
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl3.g75b
@@ -0,0 +1,586 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000190 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000230 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+ { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+ { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+ { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pa_to_pl3.g7b b/src/shaders/post_processing/gen7/pa_to_pl3.g7b
new file mode 100644
index 0000000..63c6c7a
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pa_to_pl3.g7b
@@ -0,0 +1,586 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x000062ea },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006420 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000032 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000046 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x050eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x00000000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+ { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+ { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+ { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pa.asm b/src/shaders/post_processing/gen7/pl2_to_pa.asm
new file mode 100644
index 0000000..5f737ee
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pa.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL2_TO_PA
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL2.g4a"
+#include "PL2_AVS_Buf_0.g4a"
+#include "PL2_AVS_Buf_1.g4a"
+#include "PL2_AVS_Buf_2.g4a"
+#include "PL2_AVS_Buf_3.g4a"
+#include "Save_AVS_PA.g4a"
+#include "EOT.g4a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl2_to_pa.g75b b/src/shaders/post_processing/gen7/pl2_to_pa.g75b
new file mode 100644
index 0000000..5074bc2
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pa.g75b
@@ -0,0 +1,697 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000290 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 },
+ { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 },
+ { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 },
+ { 0x00000801, 0x23680061, 0x00000000, 0x0001001f },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pa.g7b b/src/shaders/post_processing/gen7/pl2_to_pa.g7b
new file mode 100644
index 0000000..053b837
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pa.g7b
@@ -0,0 +1,697 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000052 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 },
+ { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 },
+ { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 },
+ { 0x00000801, 0x23680061, 0x00000000, 0x0001001f },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl2.asm b/src/shaders/post_processing/gen7/pl2_to_pl2.asm
new file mode 100644
index 0000000..6e840d5
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl2.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL2_TO_PL2
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL2.g4a"
+#include "PL2_AVS_Buf_0.g4a"
+#include "PL2_AVS_Buf_1.g4a"
+#include "PL2_AVS_Buf_2.g4a"
+#include "PL2_AVS_Buf_3.g4a"
+#include "Save_AVS_NV12.g4a"
+#include "EOT.g4a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl2.g75b b/src/shaders/post_processing/gen7/pl2_to_pl2.g75b
new file mode 100644
index 0000000..c25432e
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl2.g75b
@@ -0,0 +1,654 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000290 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+ { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl2.g7b b/src/shaders/post_processing/gen7/pl2_to_pl2.g7b
new file mode 100644
index 0000000..445ae01
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl2.g7b
@@ -0,0 +1,654 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000052 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+ { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl3.asm b/src/shaders/post_processing/gen7/pl2_to_pl3.asm
new file mode 100644
index 0000000..d48071b
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL2_TO_PL3
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL2.g4a"
+#include "PL2_AVS_Buf_0.g4a"
+#include "PL2_AVS_Buf_1.g4a"
+#include "PL2_AVS_Buf_2.g4a"
+#include "PL2_AVS_Buf_3.g4a"
+#include "Save_AVS_PL3.g4a"
+#include "EOT.g4a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl3.g75b b/src/shaders/post_processing/gen7/pl2_to_pl3.g75b
new file mode 100644
index 0000000..77bcdcb
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl3.g75b
@@ -0,0 +1,607 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000290 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000280 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+ { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+ { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+ { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl2_to_pl3.g7b b/src/shaders/post_processing/gen7/pl2_to_pl3.g7b
new file mode 100644
index 0000000..bec05ac
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl2_to_pl3.g7b
@@ -0,0 +1,607 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000052 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000050 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x048eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000a000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+ { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+ { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+ { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pa.asm b/src/shaders/post_processing/gen7/pl3_to_pa.asm
new file mode 100644
index 0000000..b3be8f9
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pa.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL3_TO_PA
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL3.g4a"
+#include "PL3_AVS_Buf_0.g4a"
+#include "PL3_AVS_Buf_1.g4a"
+#include "PL3_AVS_Buf_2.g4a"
+#include "PL3_AVS_Buf_3.g4a"
+#include "Save_AVS_PA.g4a"
+#include "EOT.g4a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl3_to_pa.g75b b/src/shaders/post_processing/gen7/pl3_to_pa.g75b
new file mode 100644
index 0000000..d11ab6d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pa.g75b
@@ -0,0 +1,709 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002c0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 },
+ { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 },
+ { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 },
+ { 0x00000801, 0x23680061, 0x00000000, 0x0001001f },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pa.g7b b/src/shaders/post_processing/gen7/pl3_to_pa.g7b
new file mode 100644
index 0000000..e1c3c05
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pa.g7b
@@ -0,0 +1,709 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000058 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082e2c, 0x0069005c, 0x03a003a0 },
+ { 0x00000409, 0x23603da5, 0x000000e0, 0x00010001 },
+ { 0x00000c01, 0x236401a5, 0x000000e2, 0x00000000 },
+ { 0x00000801, 0x23680061, 0x00000000, 0x0001001f },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x80400040, 0xc0002d29, 0x008a8000, 0x00800080 },
+ { 0x80400040, 0xc0202d29, 0x008a8020, 0x00800080 },
+ { 0x80400040, 0xc0402d29, 0x008a8040, 0x00800080 },
+ { 0x80400040, 0xc0602d29, 0x008a8060, 0x00800080 },
+ { 0x80600040, 0xa4002d29, 0x008d8400, 0x00800080 },
+ { 0x80600040, 0xa4202d29, 0x008d8420, 0x00800080 },
+ { 0x80600040, 0xa4402d29, 0x008d8440, 0x00800080 },
+ { 0x80600040, 0xa4602d29, 0x008d8460, 0x00800080 },
+ { 0x80400040, 0xc8002d29, 0x008a8800, 0x00800080 },
+ { 0x80400040, 0xc8202d29, 0x008a8820, 0x00800080 },
+ { 0x80400040, 0xc8402d29, 0x008a8840, 0x00800080 },
+ { 0x80400040, 0xc8602d29, 0x008a8860, 0x00800080 },
+ { 0x80400040, 0xc0102d29, 0x008a8010, 0x00800080 },
+ { 0x80400040, 0xc0302d29, 0x008a8030, 0x00800080 },
+ { 0x80400040, 0xc0502d29, 0x008a8050, 0x00800080 },
+ { 0x80400040, 0xc0702d29, 0x008a8070, 0x00800080 },
+ { 0x80600040, 0xa4102d29, 0x008d8410, 0x00800080 },
+ { 0x80600040, 0xa4302d29, 0x008d8430, 0x00800080 },
+ { 0x80600040, 0xa4502d29, 0x008d8450, 0x00800080 },
+ { 0x80600040, 0xa4702d29, 0x008d8470, 0x00800080 },
+ { 0x80400040, 0xc8102d29, 0x008a8810, 0x00800080 },
+ { 0x80400040, 0xc8302d29, 0x008a8830, 0x00800080 },
+ { 0x80400040, 0xc8502d29, 0x008a8850, 0x00800080 },
+ { 0x80400040, 0xc8702d29, 0x008a8870, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x08000800 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000002 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x02000200 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000004 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x00000006 },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x00000008 },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000a },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x06000600 },
+ { 0x00000040, 0x23841ca5, 0x00000364, 0x0000000c },
+ { 0x00000040, 0x24a41ca5, 0x00000364, 0x0000000e },
+ { 0x00600401, 0xf8000231, 0x00cf8001, 0x00000000 },
+ { 0x00600401, 0xf8200231, 0x00cf8021, 0x00000000 },
+ { 0x00800c01, 0xd0000231, 0x00d28401, 0x00000000 },
+ { 0x00800c01, 0xd0200231, 0x00d28421, 0x00000000 },
+ { 0x00600801, 0xf4000231, 0x00cf8801, 0x00000000 },
+ { 0x00600801, 0xf4200231, 0x00cf8821, 0x00000000 },
+ { 0x00600401, 0xf9200231, 0x00cf8041, 0x00000000 },
+ { 0x00600401, 0xf9400231, 0x00cf8061, 0x00000000 },
+ { 0x00800c01, 0xd1200231, 0x00d28441, 0x00000000 },
+ { 0x00800c01, 0xd1400231, 0x00d28461, 0x00000000 },
+ { 0x00600801, 0xf5200231, 0x00cf8841, 0x00000000 },
+ { 0x00600801, 0xf5400231, 0x00cf8861, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x060a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8018 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl2.asm b/src/shaders/post_processing/gen7/pl3_to_pl2.asm
new file mode 100644
index 0000000..6123fc9
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl2.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL3_TO_PL2
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL3.g4a"
+#include "PL3_AVS_Buf_0.g4a"
+#include "PL3_AVS_Buf_1.g4a"
+#include "PL3_AVS_Buf_2.g4a"
+#include "PL3_AVS_Buf_3.g4a"
+#include "Save_AVS_NV12.g4a"
+#include "EOT.g4a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl2.g75b b/src/shaders/post_processing/gen7/pl3_to_pl2.g75b
new file mode 100644
index 0000000..0cf2d5d
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl2.g75b
@@ -0,0 +1,666 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002c0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+ { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl2.g7b b/src/shaders/post_processing/gen7/pl3_to_pl2.g7b
new file mode 100644
index 0000000..9b3a59c
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl2.g7b
@@ -0,0 +1,666 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000058 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00000401, 0x24a001a5, 0x000000e0, 0x00000000 },
+ { 0x00000c08, 0x24a43da5, 0x000000e2, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x0007000f },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae8800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae8820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae8840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae8860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xc8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xc8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae8000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae8020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae8040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae8060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xc0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xc0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x00600008, 0x45c03d29, 0x00ae9800, 0x00010001 },
+ { 0x00600008, 0x45e03d29, 0x00ae9820, 0x00010001 },
+ { 0x00600008, 0x46003d29, 0x00ae9840, 0x00010001 },
+ { 0x00600008, 0x46203d29, 0x00ae9860, 0x00010001 },
+ { 0x00600040, 0x45c02529, 0x00ae05c0, 0x00ae05e0 },
+ { 0x80600040, 0xd8002d29, 0x00ae05c0, 0x00800080 },
+ { 0x00600040, 0x46002529, 0x00ae0600, 0x00ae0620 },
+ { 0x80600040, 0xd8402d29, 0x00ae0600, 0x00800080 },
+ { 0x00600008, 0x46403d29, 0x00ae9000, 0x00010001 },
+ { 0x00600008, 0x46603d29, 0x00ae9020, 0x00010001 },
+ { 0x00600008, 0x46803d29, 0x00ae9040, 0x00010001 },
+ { 0x00600008, 0x46a03d29, 0x00ae9060, 0x00010001 },
+ { 0x00600040, 0x46402529, 0x00ae0640, 0x00ae0660 },
+ { 0x80600040, 0xd0002d29, 0x00ae0640, 0x00800080 },
+ { 0x00600040, 0x46802529, 0x00ae0680, 0x00ae06a0 },
+ { 0x80600040, 0xd0402d29, 0x00ae0680, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x44c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x44d00231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x44c10231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x44d10231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x44e00231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x44f00231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x44e10231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x44f10231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x45000231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x45100231, 0x00cf8841, 0x00000000 },
+ { 0x00600c01, 0x45010231, 0x00cf8001, 0x00000000 },
+ { 0x00600801, 0x45110231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600401, 0x45200231, 0x00cf9801, 0x00000000 },
+ { 0x00600c01, 0x45300231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x45210231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x45310231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x0a0a8019 },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl3.asm b/src/shaders/post_processing/gen7/pl3_to_pl3.asm
new file mode 100644
index 0000000..0861513
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl3.asm
@@ -0,0 +1,17 @@
+// Module name: AVS
+.kernel PL3_TO_PL3
+.code
+
+#include "VP_Setup.g4a"
+#include "Set_Layer_0.g4a"
+#include "Set_AVS_Buf_0123_PL3.g4a"
+#include "PL3_AVS_Buf_0.g4a"
+#include "PL3_AVS_Buf_1.g4a"
+#include "PL3_AVS_Buf_2.g4a"
+#include "PL3_AVS_Buf_3.g4a"
+#include "Save_AVS_PL3.g4a"
+#include "EOT.g4a"
+
+.end_code
+
+.end_kernel
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl3.g75b b/src/shaders/post_processing/gen7/pl3_to_pl3.g75b
new file mode 100644
index 0000000..7d204f7
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl3.g75b
@@ -0,0 +1,619 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000090 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x000000f0 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000180 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000120 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000000d0 },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002c0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x000002b0 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x000001a0 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+ { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+ { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+ { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/gen7/pl3_to_pl3.g7b b/src/shaders/post_processing/gen7/pl3_to_pl3.g7b
new file mode 100644
index 0000000..6929daa
--- /dev/null
+++ b/src/shaders/post_processing/gen7/pl3_to_pl3.g7b
@@ -0,0 +1,619 @@
+ { 0x00600001, 0x23600021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23200021, 0x008d0000, 0x00000000 },
+ { 0x00600001, 0x23400021, 0x008d0000, 0x00000000 },
+ { 0x00600041, 0x206077bd, 0x008d0060, 0x000000f0 },
+ { 0x00200001, 0x211401bd, 0x004500e0, 0x00000000 },
+ { 0x01600010, 0x20002e24, 0x0000005a, 0x00010001 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00000000 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000114 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240003bc, 0x000000c0, 0x00000000 },
+ { 0x00110048, 0x20c077bd, 0x00000060, 0x00000220 },
+ { 0x00000001, 0x240003bc, 0x000000a0, 0x00000000 },
+ { 0x00000048, 0x20a077bd, 0x00000080, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00020002 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000114 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240403bc, 0x000000c4, 0x00000000 },
+ { 0x00110048, 0x20c477bd, 0x00000064, 0x00000220 },
+ { 0x00000001, 0x240403bc, 0x000000a4, 0x00000000 },
+ { 0x00000048, 0x20a477bd, 0x00000084, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00040004 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000114 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240803bc, 0x000000c8, 0x00000000 },
+ { 0x00110048, 0x20c877bd, 0x00000068, 0x00000220 },
+ { 0x00000001, 0x240803bc, 0x000000a8, 0x00000000 },
+ { 0x00000048, 0x20a877bd, 0x00000088, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00060006 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000114 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x240c03bc, 0x000000cc, 0x00000000 },
+ { 0x00110048, 0x20cc77bd, 0x0000006c, 0x00000220 },
+ { 0x00000001, 0x240c03bc, 0x000000ac, 0x00000000 },
+ { 0x00000048, 0x20ac77bd, 0x0000008c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x00080008 },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000114 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241003bc, 0x000000d0, 0x00000000 },
+ { 0x00110048, 0x20d077bd, 0x00000070, 0x00000220 },
+ { 0x00000001, 0x241003bc, 0x000000b0, 0x00000000 },
+ { 0x00000048, 0x20b077bd, 0x00000090, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000a000a },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000114 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241403bc, 0x000000d4, 0x00000000 },
+ { 0x00110048, 0x20d477bd, 0x00000074, 0x00000220 },
+ { 0x00000001, 0x241403bc, 0x000000b4, 0x00000000 },
+ { 0x00000048, 0x20b477bd, 0x00000094, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000c000c },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000114 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241803bc, 0x000000d8, 0x00000000 },
+ { 0x00110048, 0x20d877bd, 0x00000078, 0x00000220 },
+ { 0x00000001, 0x241803bc, 0x000000b8, 0x00000000 },
+ { 0x00000048, 0x20b877bd, 0x00000098, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00000008, 0x22202d29, 0x00000044, 0x000e000e },
+ { 0x00000005, 0x22202d29, 0x00000220, 0x00030003 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00010001 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000012 },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00020002 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x0000001e },
+ { 0x01000010, 0x20002d2c, 0x02000220, 0x00030003 },
+ { 0x00010220, 0x34001c00, 0x02001400, 0x00000030 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000114 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000118 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000118 },
+ { 0x00000001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x00000024 },
+ { 0x00110001, 0x2200013d, 0x00000040, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004114, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00000040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00000040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000220 },
+ { 0x00000220, 0x34001c00, 0x00001400, 0x0000000e },
+ { 0x00110001, 0x2200013d, 0x00000042, 0x00000000 },
+ { 0x00110040, 0x222077bd, 0x00004118, 0x00000200 },
+ { 0x00110040, 0x22207fbd, 0x00000220, 0xc1800000 },
+ { 0x00110001, 0x241c03bc, 0x000000dc, 0x00000000 },
+ { 0x00110048, 0x20dc77bd, 0x0000007c, 0x00000220 },
+ { 0x00000001, 0x241c03bc, 0x000000bc, 0x00000000 },
+ { 0x00000048, 0x20bc77bd, 0x0000009c, 0x00000114 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x22a00169, 0x00000000, 0xffffffff },
+ { 0x02000010, 0x20002d24, 0x020000e4, 0x00000000 },
+ { 0x02010010, 0x20002d24, 0x020000e6, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x0000001a },
+ { 0x00000005, 0x23022d31, 0x00000044, 0x00030003 },
+ { 0x00000401, 0x233803bd, 0x000000f4, 0x00000000 },
+ { 0x00000c01, 0x233003bd, 0x00000060, 0x00000000 },
+ { 0x00000c01, 0x233403bd, 0x00000080, 0x00000000 },
+ { 0x00000c01, 0x232803bd, 0x000000c0, 0x00000000 },
+ { 0x00000801, 0x232c03bd, 0x000000a0, 0x00000000 },
+ { 0x00000001, 0x240803bc, 0x000000c0, 0x00000000 },
+ { 0x00000048, 0x24087fbc, 0x00000060, 0x41000000 },
+ { 0x00000448, 0x22e87fbd, 0x000000f4, 0x41e00000 },
+ { 0x00000c41, 0x22e47fbd, 0x00000080, 0x40800000 },
+ { 0x00000001, 0x241003bc, 0x00000060, 0x00000000 },
+ { 0x00000c48, 0x22f07fbd, 0x000000f4, 0x41000000 },
+ { 0x00000801, 0x22f40061, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006ea2 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00460046 },
+ { 0x00400009, 0x22c02d8d, 0x00690400, 0x00050005 },
+ { 0x00400001, 0x2400036c, 0x00000000, 0x00006204 },
+ { 0x00400040, 0x24002d8c, 0x00690400, 0x00400040 },
+ { 0x00400409, 0x22402d8d, 0x00690400, 0x00050005 },
+ { 0x00000801, 0x22500061, 0x00000000, 0x01000100 },
+ { 0x00000001, 0x22d00061, 0x00000000, 0x00400040 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000058 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000401, 0x233c0021, 0x000000fc, 0x00000000 },
+ { 0x00000801, 0x23240121, 0x000000f8, 0x00000000 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x28000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x28800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x29000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000001 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2a000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2a800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2b000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000002 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2c000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2c800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2d000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00110220, 0x34001c00, 0x02001400, 0x00000056 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb400 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000d000 },
+ { 0x00000040, 0x23240d21, 0x000000f8, 0x00000003 },
+ { 0x00600001, 0x22200021, 0x008d0320, 0x00000000 },
+ { 0x00000001, 0x21d00109, 0x00000600, 0x00000000 },
+ { 0x00000001, 0x21d403bd, 0x00000228, 0x00000000 },
+ { 0x02000005, 0x20002d28, 0x00000046, 0x00020002 },
+ { 0x00110220, 0x34001c00, 0x00001400, 0x00000034 },
+ { 0x02600005, 0x20002d28, 0x00000046, 0x00040004 },
+ { 0x00610001, 0x240003bc, 0x00000228, 0x00000000 },
+ { 0x00610048, 0x24007fbc, 0x00000230, 0xc0000000 },
+ { 0x00610048, 0x24007fbc, 0x00000238, 0x40400000 },
+ { 0x00010001, 0x21c8039d, 0x00210400, 0x00000000 },
+ { 0x00110001, 0x21c803bd, 0x00000228, 0x00000000 },
+ { 0x00000005, 0x21c42d21, 0x00000046, 0xfff8fff8 },
+ { 0x0000000c, 0x21c41c21, 0x000001c4, 0x00000003 },
+ { 0x00000001, 0x21c4003d, 0x000001c4, 0x00000000 },
+ { 0x00000041, 0x21c077bd, 0x000001c8, 0x000001c4 },
+ { 0x00000040, 0x21c87fbd, 0x000001c0, 0x3ca00000 },
+ { 0x00000040, 0x21cc7fbd, 0x000001c0, 0x3f7f0000 },
+ { 0x05000010, 0x20007fbc, 0x000001c0, 0x00000000 },
+ { 0x00000001, 0x21c003a5, 0x000001c0, 0x00000000 },
+ { 0x00010040, 0x21c01ca5, 0x000001c0, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001c8, 0x00000000 },
+ { 0x00000001, 0x21c803a5, 0x000001c8, 0x00000000 },
+ { 0x00010040, 0x21c81ca5, 0x000001c8, 0xffffffff },
+ { 0x05000010, 0x20007fbc, 0x000001cc, 0x00000000 },
+ { 0x00000001, 0x21cc03a5, 0x000001cc, 0x00000000 },
+ { 0x00010040, 0x21cc1ca5, 0x000001cc, 0xffffffff },
+ { 0x00000001, 0x26000168, 0x00000000, 0x00000000 },
+ { 0x03000010, 0x200014a4, 0x040001c8, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x04000228, 0x0000404c },
+ { 0x01110010, 0x200014a4, 0x000001cc, 0x000001c0 },
+ { 0x00010040, 0x222877bd, 0x00000228, 0x00000048 },
+ { 0x00000001, 0x26000128, 0x000001d0, 0x00000000 },
+ { 0x02000031, 0x2e000229, 0x00000200, 0x00000200 },
+ { 0x00000001, 0x222803bd, 0x000001d4, 0x00000000 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044eb801 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2e800229, 0x00000200, 0x00000200 },
+ { 0x00000040, 0x22000c20, 0x000002f4, 0x044ebc02 },
+ { 0x00000001, 0x22080061, 0x00000000, 0x0000e000 },
+ { 0x02000031, 0x2f000229, 0x00000200, 0x00000200 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00600001, 0x23800021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x24a00021, 0x008d0360, 0x00000000 },
+ { 0x00600001, 0x25c00021, 0x008d0360, 0x00000000 },
+ { 0x00200401, 0x238001a5, 0x004500e0, 0x00000000 },
+ { 0x00200408, 0x24a03da5, 0x004500e0, 0x00010001 },
+ { 0x00200408, 0x25c03da5, 0x004500e0, 0x00010001 },
+ { 0x00000801, 0x23880061, 0x00000000, 0x000f000f },
+ { 0x00000801, 0x24a80061, 0x00000000, 0x00070007 },
+ { 0x00000801, 0x25c80061, 0x00000000, 0x00070007 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x80800040, 0xa4002d29, 0x00b18400, 0x00800080 },
+ { 0x80800040, 0xa4202d29, 0x00b18420, 0x00800080 },
+ { 0x80800040, 0xa4402d29, 0x00b18440, 0x00800080 },
+ { 0x80800040, 0xa4602d29, 0x00b18460, 0x00800080 },
+ { 0x80600040, 0xc8002d29, 0x00ae8800, 0x00800080 },
+ { 0x80600040, 0xc8402d29, 0x00ae8840, 0x00800080 },
+ { 0x80600040, 0xc0002d29, 0x00ae8000, 0x00800080 },
+ { 0x80600040, 0xc0402d29, 0x00ae8040, 0x00800080 },
+ { 0x80800040, 0xb4002d29, 0x00b19400, 0x00800080 },
+ { 0x80800040, 0xb4202d29, 0x00b19420, 0x00800080 },
+ { 0x80800040, 0xb4402d29, 0x00b19440, 0x00800080 },
+ { 0x80800040, 0xb4602d29, 0x00b19460, 0x00800080 },
+ { 0x80600040, 0xd8002d29, 0x00ae9800, 0x00800080 },
+ { 0x80600040, 0xd8402d29, 0x00ae9840, 0x00800080 },
+ { 0x80600040, 0xd0002d29, 0x00ae9000, 0x00800080 },
+ { 0x80600040, 0xd0402d29, 0x00ae9040, 0x00800080 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x02000200 },
+ { 0x00800401, 0x23a00231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x23b00231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x23c00231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x23d00231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24c00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24c80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x25e00231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x25e80231, 0x00cf8041, 0x00000000 },
+ { 0x00400040, 0x22002da8, 0x006902c0, 0x04000400 },
+ { 0x00800401, 0x23e00231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x23f00231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24000231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24100231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24d00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24d80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x25f00231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x25f80231, 0x00cf9041, 0x00000000 },
+ { 0x00400040, 0x22082da8, 0x006902c0, 0x06000600 },
+ { 0x00800401, 0x24200231, 0x00d28401, 0x00000000 },
+ { 0x00800801, 0x24300231, 0x00d28421, 0x00000000 },
+ { 0x00800401, 0x24400231, 0x00d28441, 0x00000000 },
+ { 0x00800801, 0x24500231, 0x00d28461, 0x00000000 },
+ { 0x00600401, 0x24e00231, 0x00cf8801, 0x00000000 },
+ { 0x00600c01, 0x24e80231, 0x00cf8841, 0x00000000 },
+ { 0x00600401, 0x26000231, 0x00cf8001, 0x00000000 },
+ { 0x00600c01, 0x26080231, 0x00cf8041, 0x00000000 },
+ { 0x00800401, 0x24600231, 0x00d29401, 0x00000000 },
+ { 0x00800801, 0x24700231, 0x00d29421, 0x00000000 },
+ { 0x00800401, 0x24800231, 0x00d29441, 0x00000000 },
+ { 0x00800801, 0x24900231, 0x00d29461, 0x00000000 },
+ { 0x00600c01, 0x24f00231, 0x00cf9801, 0x00000000 },
+ { 0x00600801, 0x24f80231, 0x00cf9841, 0x00000000 },
+ { 0x00600c01, 0x26100231, 0x00cf9001, 0x00000000 },
+ { 0x00600801, 0x26180231, 0x00cf9041, 0x00000000 },
+ { 0x05000031, 0x20000e24, 0x00000380, 0x120a8018 },
+ { 0x05000031, 0x20000e24, 0x000004a0, 0x060a8019 },
+ { 0x05000031, 0x20000e24, 0x000005c0, 0x060a801a },
+ { 0x00600001, 0x2fe00021, 0x008d0000, 0x00000000 },
+ { 0x07000031, 0x20001e24, 0x00000fe0, 0x82000010 },
diff --git a/src/shaders/post_processing/nv12_avs_nv12.asm b/src/shaders/post_processing/nv12_avs_nv12.asm
deleted file mode 100644
index 80665e0..0000000
--- a/src/shaders/post_processing/nv12_avs_nv12.asm
+++ /dev/null
@@ -1,19 +0,0 @@
-// Module name: NV12_AVS_NV12
-.kernel NV12_AVS_NV12
-.code
-
-#define INC_SCALING
-
-#include "SetupVPKernel.asm"
-#include "Multiple_Loop_Head.asm"
-#include "PL2_AVS_IEF_16x8.asm"
-#include "PL8x4_Save_NV12.asm"
-#include "Multiple_Loop.asm"
-
-END_THREAD // End of Thread
-
-.end_code
-
-.end_kernel
-
-// end of nv12_avs_nv12.asm
diff --git a/src/shaders/post_processing/nv12_avs_nv12.g4b.gen5 b/src/shaders/post_processing/nv12_avs_nv12.g4b.gen5
deleted file mode 100644
index 1fa4261..0000000
--- a/src/shaders/post_processing/nv12_avs_nv12.g4b.gen5
+++ /dev/null
@@ -1,162 +0,0 @@
- { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
- { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
- { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
- { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
- { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
- { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
- { 0x01000005, 0x20002d2c, 0x00000088, 0x80008000 },
- { 0x00010001, 0x20c003fd, 0x00000000, 0x00000000 },
- { 0x00000001, 0x212003bd, 0x000000c0, 0x00000000 },
- { 0x00000001, 0x212403bd, 0x000000bc, 0x00000000 },
- { 0x00000001, 0x213403bd, 0x00000038, 0x00000000 },
- { 0x00200001, 0x612803bd, 0x004500a4, 0x00000000 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
- { 0x00802001, 0x20000022, 0x008d0100, 0x00000000 },
- { 0x00000031, 0x25401c09, 0x208d0000, 0x044bb401 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
- { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
- { 0x02000031, 0x25c01c09, 0x208d0000, 0x048bb802 },
- { 0x00000001, 0x240803bc, 0x000000a4, 0x00000000 },
- { 0x00000048, 0x24087fbc, 0x000000bc, 0x41000000 },
- { 0x00000048, 0x21287fbd, 0x000000c0, 0x41e00000 },
- { 0x00000001, 0x240403bc, 0x000000bc, 0x00000000 },
- { 0x00000048, 0x21247fbd, 0x000000c0, 0x41000000 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
- { 0x00802001, 0x20000022, 0x008d0100, 0x00000000 },
- { 0x00000031, 0x27401c09, 0x208d0000, 0x044bb401 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
- { 0x00802001, 0x20400022, 0x008d0100, 0x00000000 },
- { 0x02000031, 0x27c01c09, 0x208d0000, 0x048bb802 },
- { 0x00600001, 0x21400229, 0x00aa0541, 0x00000000 },
- { 0x00600001, 0x21600229, 0x00aa0549, 0x00000000 },
- { 0x00600001, 0x21800229, 0x00aa0561, 0x00000000 },
- { 0x00600001, 0x21a00229, 0x00aa0569, 0x00000000 },
- { 0x00600001, 0x21c00229, 0x00aa0581, 0x00000000 },
- { 0x00600001, 0x21e00229, 0x00aa0589, 0x00000000 },
- { 0x00600001, 0x22000229, 0x00aa05a1, 0x00000000 },
- { 0x00600001, 0x22200229, 0x00aa05a9, 0x00000000 },
- { 0x00600001, 0x23400229, 0x00aa05c1, 0x00000000 },
- { 0x00600001, 0x23600229, 0x00aa05c9, 0x00000000 },
- { 0x00600001, 0x23800229, 0x00aa05e1, 0x00000000 },
- { 0x00600001, 0x23a00229, 0x00aa05e9, 0x00000000 },
- { 0x00600001, 0x23c00229, 0x00aa0641, 0x00000000 },
- { 0x00600001, 0x23e00229, 0x00aa0649, 0x00000000 },
- { 0x00600001, 0x24000229, 0x00aa0661, 0x00000000 },
- { 0x00600001, 0x24200229, 0x00aa0669, 0x00000000 },
- { 0x00600001, 0x22400229, 0x00aa0601, 0x00000000 },
- { 0x00600001, 0x22600229, 0x00aa0609, 0x00000000 },
- { 0x00600001, 0x22800229, 0x00aa0621, 0x00000000 },
- { 0x00600001, 0x22a00229, 0x00aa0629, 0x00000000 },
- { 0x00600001, 0x22c00229, 0x00aa0681, 0x00000000 },
- { 0x00600001, 0x22e00229, 0x00aa0689, 0x00000000 },
- { 0x00600001, 0x23000229, 0x00aa06a1, 0x00000000 },
- { 0x00600001, 0x23200229, 0x00aa06a9, 0x00000000 },
- { 0x00600001, 0x21500229, 0x00aa0741, 0x00000000 },
- { 0x00600001, 0x21700229, 0x00aa0749, 0x00000000 },
- { 0x00600001, 0x21900229, 0x00aa0761, 0x00000000 },
- { 0x00600001, 0x21b00229, 0x00aa0769, 0x00000000 },
- { 0x00600001, 0x21d00229, 0x00aa0781, 0x00000000 },
- { 0x00600001, 0x21f00229, 0x00aa0789, 0x00000000 },
- { 0x00600001, 0x22100229, 0x00aa07a1, 0x00000000 },
- { 0x00600001, 0x22300229, 0x00aa07a9, 0x00000000 },
- { 0x00600001, 0x23500229, 0x00aa07c1, 0x00000000 },
- { 0x00600001, 0x23700229, 0x00aa07c9, 0x00000000 },
- { 0x00600001, 0x23900229, 0x00aa07e1, 0x00000000 },
- { 0x00600001, 0x23b00229, 0x00aa07e9, 0x00000000 },
- { 0x00600001, 0x23d00229, 0x00aa0841, 0x00000000 },
- { 0x00600001, 0x23f00229, 0x00aa0849, 0x00000000 },
- { 0x00600001, 0x24100229, 0x00aa0861, 0x00000000 },
- { 0x00600001, 0x24300229, 0x00aa0869, 0x00000000 },
- { 0x00600001, 0x22500229, 0x00aa0801, 0x00000000 },
- { 0x00600001, 0x22700229, 0x00aa0809, 0x00000000 },
- { 0x00600001, 0x22900229, 0x00aa0821, 0x00000000 },
- { 0x00600001, 0x22b00229, 0x00aa0829, 0x00000000 },
- { 0x00600001, 0x22d00229, 0x00aa0881, 0x00000000 },
- { 0x00600001, 0x22f00229, 0x00aa0889, 0x00000000 },
- { 0x00600001, 0x23100229, 0x00aa08a1, 0x00000000 },
- { 0x00600001, 0x23300229, 0x00aa08a9, 0x00000000 },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
- { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
- { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
- { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
- { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
- { 0x01600031, 0x27000c01, 0x408d0000, 0x0248a007 },
- { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
- { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x01600031, 0x28000c01, 0x408d0000, 0x0228a008 },
- { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
- { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
- { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
- { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
- { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
- { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
- { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
- { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
- { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
- { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
- { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
- { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
- { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
- { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
- { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
- { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
- { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
- { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
- { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
- { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
- { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
- { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
- { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
- { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
- { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
- { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
- { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
- { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
- { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
- { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
- { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
- { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
- { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
- { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
- { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
- { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
- { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
- { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
- { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
- { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
- { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
- { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
- { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
- { 0x01600031, 0x20000c04, 0x508d0000, 0x0a082007 },
- { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
- { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
- { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
- { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
- { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
- { 0x01600031, 0x20000c04, 0x508d0000, 0x06082008 },
- { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
- { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
- { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
- { 0x00000041, 0x24407fbd, 0x000000bc, 0x41800000 },
- { 0x00000040, 0x20a477bd, 0x00000440, 0x000000a4 },
- { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
- { 0x00010220, 0x34001c00, 0x02001400, 0xfffffede },
- { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
- { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
- { 0x00000001, 0x20a403bd, 0x00000094, 0x00000000 },
- { 0x00000041, 0x24407fbd, 0x00000038, 0x41000000 },
- { 0x00000040, 0x20a877bd, 0x00000440, 0x000000a8 },
- { 0x00000220, 0x34001c00, 0x00001400, 0xfffffed2 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
- { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
- { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
- { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/nv12_avs_nv12.g6b b/src/shaders/post_processing/nv12_avs_nv12.g6b
deleted file mode 100644
index 7e1dfc3..0000000
--- a/src/shaders/post_processing/nv12_avs_nv12.g6b
+++ /dev/null
@@ -1,235 +0,0 @@
- { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
- { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
- { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
- { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
- { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
- { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
- { 0x01000005, 0x20002d2c, 0x00000088, 0x80008000 },
- { 0x00010001, 0x20c003fd, 0x00000000, 0x00000000 },
- { 0x00000001, 0x212003bd, 0x000000c0, 0x00000000 },
- { 0x00000001, 0x212403bd, 0x000000bc, 0x00000000 },
- { 0x00000001, 0x213403bd, 0x00000038, 0x00000000 },
- { 0x00200001, 0x612803bd, 0x004500a4, 0x00000000 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
- { 0x00800001, 0x20000022, 0x008d0100, 0x00000000 },
- { 0x02000031, 0x25401cc9, 0x00000000, 0x044bb401 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
- { 0x00800001, 0x20400022, 0x008d0100, 0x00000000 },
- { 0x02000031, 0x25c01cc9, 0x00000040, 0x048bb802 },
- { 0x00000001, 0x240803bc, 0x000000a4, 0x00000000 },
- { 0x00000048, 0x24087fbc, 0x000000bc, 0x41000000 },
- { 0x00000048, 0x21287fbd, 0x000000c0, 0x41e00000 },
- { 0x00000001, 0x240403bc, 0x000000bc, 0x00000000 },
- { 0x00000048, 0x21247fbd, 0x000000c0, 0x41000000 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0000d000 },
- { 0x00800001, 0x20000022, 0x008d0100, 0x00000000 },
- { 0x02000031, 0x27401cc9, 0x00000000, 0x044bb401 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0000a000 },
- { 0x00800001, 0x20400022, 0x008d0100, 0x00000000 },
- { 0x02000031, 0x27c01cc9, 0x00000040, 0x048bb802 },
- { 0x00600001, 0x21400229, 0x00aa0541, 0x00000000 },
- { 0x00600001, 0x21600229, 0x00aa0549, 0x00000000 },
- { 0x00600001, 0x21800229, 0x00aa0561, 0x00000000 },
- { 0x00600001, 0x21a00229, 0x00aa0569, 0x00000000 },
- { 0x00600001, 0x21c00229, 0x00aa0581, 0x00000000 },
- { 0x00600001, 0x21e00229, 0x00aa0589, 0x00000000 },
- { 0x00600001, 0x22000229, 0x00aa05a1, 0x00000000 },
- { 0x00600001, 0x22200229, 0x00aa05a9, 0x00000000 },
- { 0x00600001, 0x23400229, 0x00aa05c1, 0x00000000 },
- { 0x00600001, 0x23600229, 0x00aa05c9, 0x00000000 },
- { 0x00600001, 0x23800229, 0x00aa05e1, 0x00000000 },
- { 0x00600001, 0x23a00229, 0x00aa05e9, 0x00000000 },
- { 0x00600001, 0x23c00229, 0x00aa0641, 0x00000000 },
- { 0x00600001, 0x23e00229, 0x00aa0649, 0x00000000 },
- { 0x00600001, 0x24000229, 0x00aa0661, 0x00000000 },
- { 0x00600001, 0x24200229, 0x00aa0669, 0x00000000 },
- { 0x00600001, 0x22400229, 0x00aa0601, 0x00000000 },
- { 0x00600001, 0x22600229, 0x00aa0609, 0x00000000 },
- { 0x00600001, 0x22800229, 0x00aa0621, 0x00000000 },
- { 0x00600001, 0x22a00229, 0x00aa0629, 0x00000000 },
- { 0x00600001, 0x22c00229, 0x00aa0681, 0x00000000 },
- { 0x00600001, 0x22e00229, 0x00aa0689, 0x00000000 },
- { 0x00600001, 0x23000229, 0x00aa06a1, 0x00000000 },
- { 0x00600001, 0x23200229, 0x00aa06a9, 0x00000000 },
- { 0x00600001, 0x21500229, 0x00aa0741, 0x00000000 },
- { 0x00600001, 0x21700229, 0x00aa0749, 0x00000000 },
- { 0x00600001, 0x21900229, 0x00aa0761, 0x00000000 },
- { 0x00600001, 0x21b00229, 0x00aa0769, 0x00000000 },
- { 0x00600001, 0x21d00229, 0x00aa0781, 0x00000000 },
- { 0x00600001, 0x21f00229, 0x00aa0789, 0x00000000 },
- { 0x00600001, 0x22100229, 0x00aa07a1, 0x00000000 },
- { 0x00600001, 0x22300229, 0x00aa07a9, 0x00000000 },
- { 0x00600001, 0x23500229, 0x00aa07c1, 0x00000000 },
- { 0x00600001, 0x23700229, 0x00aa07c9, 0x00000000 },
- { 0x00600001, 0x23900229, 0x00aa07e1, 0x00000000 },
- { 0x00600001, 0x23b00229, 0x00aa07e9, 0x00000000 },
- { 0x00600001, 0x23d00229, 0x00aa0841, 0x00000000 },
- { 0x00600001, 0x23f00229, 0x00aa0849, 0x00000000 },
- { 0x00600001, 0x24100229, 0x00aa0861, 0x00000000 },
- { 0x00600001, 0x24300229, 0x00aa0869, 0x00000000 },
- { 0x00600001, 0x22500229, 0x00aa0801, 0x00000000 },
- { 0x00600001, 0x22700229, 0x00aa0809, 0x00000000 },
- { 0x00600001, 0x22900229, 0x00aa0821, 0x00000000 },
- { 0x00600001, 0x22b00229, 0x00aa0829, 0x00000000 },
- { 0x00600001, 0x22d00229, 0x00aa0881, 0x00000000 },
- { 0x00600001, 0x22f00229, 0x00aa0889, 0x00000000 },
- { 0x00600001, 0x23100229, 0x00aa08a1, 0x00000000 },
- { 0x00600001, 0x23300229, 0x00aa08a9, 0x00000000 },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x00200001, 0x202001a6, 0x004500a0, 0x00000000 },
- { 0x00000001, 0x20280062, 0x00000000, 0x0007000f },
- { 0x00000005, 0x24000c20, 0x000000b8, 0x00ffffff },
- { 0x04000010, 0x20000c04, 0x00000400, 0x00ffffff },
- { 0x00010220, 0x34001c00, 0x00001400, 0x00000056 },
- { 0x04600031, 0x27000cc1, 0x00000020, 0x02498007 },
- { 0x0000040c, 0x21043da1, 0x000000a2, 0x00010001 },
- { 0x00000801, 0x21080061, 0x00000000, 0x0003000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x04600031, 0x28000cc1, 0x00000020, 0x02298008 },
- { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0007000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x00000001, 0x26020228, 0x000000ba, 0x00000000 },
- { 0x00610001, 0x24400129, 0x020000b8, 0x00000000 },
- { 0x00710001, 0x24400169, 0x02000000, 0x00000000 },
- { 0x00000001, 0x24600061, 0x00000000, 0x00040001 },
- { 0x00000001, 0x24640061, 0x00000000, 0x00400010 },
- { 0x00000001, 0x24680061, 0x00000000, 0x04000100 },
- { 0x00000001, 0x246c0061, 0x00000000, 0x40001000 },
- { 0x00000001, 0x26020128, 0x00000440, 0x00000000 },
- { 0x00910001, 0x41400231, 0x02b10700, 0x00000000 },
- { 0x02600005, 0x2000252c, 0x02000440, 0x008d0460 },
- { 0x00710001, 0x42400231, 0x02ae0800, 0x00000000 },
- { 0x00710001, 0x43400231, 0x02ae0801, 0x00000000 },
- { 0x00000001, 0x26020128, 0x00000442, 0x00000000 },
- { 0x00910001, 0x41600231, 0x02b10710, 0x00000000 },
- { 0x00000001, 0x26020128, 0x00000444, 0x00000000 },
- { 0x00910001, 0x41800231, 0x02b10720, 0x00000000 },
- { 0x02600005, 0x2000252c, 0x02000444, 0x008d0460 },
- { 0x00710001, 0x42500231, 0x02ae0810, 0x00000000 },
- { 0x00710001, 0x43500231, 0x02ae0811, 0x00000000 },
- { 0x00000001, 0x26020128, 0x00000446, 0x00000000 },
- { 0x00910001, 0x41a00231, 0x02b10730, 0x00000000 },
- { 0x00000001, 0x26020128, 0x00000448, 0x00000000 },
- { 0x00910001, 0x41c00231, 0x02b10740, 0x00000000 },
- { 0x02600005, 0x2000252c, 0x02000448, 0x008d0460 },
- { 0x00710001, 0x42600231, 0x02ae0820, 0x00000000 },
- { 0x00710001, 0x43600231, 0x02ae0821, 0x00000000 },
- { 0x00000001, 0x26020128, 0x0000044a, 0x00000000 },
- { 0x00910001, 0x41e00231, 0x02b10750, 0x00000000 },
- { 0x00000001, 0x26020128, 0x0000044c, 0x00000000 },
- { 0x00910001, 0x42000231, 0x02b10760, 0x00000000 },
- { 0x02600005, 0x2000252c, 0x0200044c, 0x008d0460 },
- { 0x00710001, 0x42700231, 0x02ae0830, 0x00000000 },
- { 0x00710001, 0x43700231, 0x02ae0831, 0x00000000 },
- { 0x00000001, 0x26020128, 0x0000044e, 0x00000000 },
- { 0x00910001, 0x42200231, 0x02b10770, 0x00000000 },
- { 0x00800001, 0x20400232, 0x00d20140, 0x00000000 },
- { 0x00800001, 0x20500232, 0x00d20160, 0x00000000 },
- { 0x00800001, 0x20600232, 0x00d20180, 0x00000000 },
- { 0x00800001, 0x20700232, 0x00d201a0, 0x00000000 },
- { 0x00800001, 0x20800232, 0x00d201c0, 0x00000000 },
- { 0x00800001, 0x20900232, 0x00d201e0, 0x00000000 },
- { 0x00800001, 0x20a00232, 0x00d20200, 0x00000000 },
- { 0x00800001, 0x20b00232, 0x00d20220, 0x00000000 },
- { 0x05600031, 0x20000cc4, 0x00000020, 0x0a094007 },
- { 0x00200001, 0x210001a5, 0x004500a0, 0x00000000 },
- { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x00800001, 0x40400232, 0x00d20240, 0x00000000 },
- { 0x00800001, 0x40410232, 0x00d20340, 0x00000000 },
- { 0x00800001, 0x40600232, 0x00d20260, 0x00000000 },
- { 0x00800001, 0x40610232, 0x00d20360, 0x00000000 },
- { 0x05600031, 0x20000cc4, 0x00000020, 0x06094008 },
- { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
- { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
- { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
- { 0x00000041, 0x24407fbd, 0x000000bc, 0x41800000 },
- { 0x00000040, 0x20a477bd, 0x00000440, 0x000000a4 },
- { 0x00010220, 0x34001c00, 0x00001400, 0x0000000e },
- { 0x00010220, 0x34001c00, 0x02001400, 0xfffffede },
- { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
- { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
- { 0x00000001, 0x20a403bd, 0x00000094, 0x00000000 },
- { 0x00000041, 0x24407fbd, 0x00000038, 0x41000000 },
- { 0x00000040, 0x20a877bd, 0x00000440, 0x000000a8 },
- { 0x00000220, 0x34001c00, 0x00001400, 0xfffffed2 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
- { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
- { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
- { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/post_processing/nv12_dndi_nv12.g4b.gen5 b/src/shaders/post_processing/nv12_dndi_nv12.g4b.gen5
deleted file mode 100644
index 6c0474d..0000000
--- a/src/shaders/post_processing/nv12_dndi_nv12.g4b.gen5
+++ /dev/null
@@ -1,86 +0,0 @@
- { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
- { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
- { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
- { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
- { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
- { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
- { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
- { 0x01600031, 0x24400c01, 0x208d0000, 0x04cb8004 },
- { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
- { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
- { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
- { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
- { 0x00600001, 0x22400229, 0x00ae0481, 0x00000000 },
- { 0x00600001, 0x23400229, 0x00ae0480, 0x00000000 },
- { 0x00600001, 0x22500229, 0x00ae0491, 0x00000000 },
- { 0x00600001, 0x23500229, 0x00ae0490, 0x00000000 },
- { 0x00600001, 0x22600229, 0x00ae04a1, 0x00000000 },
- { 0x00600001, 0x23600229, 0x00ae04a0, 0x00000000 },
- { 0x00600001, 0x22700229, 0x00ae04b1, 0x00000000 },
- { 0x00600001, 0x23700229, 0x00ae04b0, 0x00000000 },
- { 0x00800001, 0x21c00229, 0x00b104c0, 0x00000000 },
- { 0x00800001, 0x21e00229, 0x00b104d0, 0x00000000 },
- { 0x00800001, 0x22000229, 0x00b104e0, 0x00000000 },
- { 0x00800001, 0x22200229, 0x00b104f0, 0x00000000 },
- { 0x00600001, 0x22800229, 0x00ae0501, 0x00000000 },
- { 0x00600001, 0x23800229, 0x00ae0500, 0x00000000 },
- { 0x00600001, 0x22900229, 0x00ae0511, 0x00000000 },
- { 0x00600001, 0x23900229, 0x00ae0510, 0x00000000 },
- { 0x00600001, 0x22a00229, 0x00ae0521, 0x00000000 },
- { 0x00600001, 0x23a00229, 0x00ae0520, 0x00000000 },
- { 0x00600001, 0x22b00229, 0x00ae0531, 0x00000000 },
- { 0x00600001, 0x23b00229, 0x00ae0530, 0x00000000 },
- { 0x00000008, 0x21003da1, 0x000000a0, 0x00010001 },
- { 0x00000001, 0x210401a1, 0x000000a2, 0x00000000 },
- { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
- { 0x00600001, 0x21600022, 0x008d0100, 0x00000000 },
- { 0x00600001, 0x21800022, 0x008d0540, 0x00000000 },
- { 0x0b600031, 0x20000c04, 0x508d0000, 0x04082014 },
- { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
- { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
- { 0x00000001, 0x21080061, 0x00000000, 0x00000003 },
- { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
- { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 },
- { 0x0d600031, 0x20000c04, 0x508d0000, 0x04082014 },
- { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
- { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 },
- { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a },
- { 0x00400001, 0x20400022, 0x00690580, 0x00000000 },
- { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 },
- { 0x00400001, 0x20600022, 0x00690590, 0x00000000 },
- { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 },
- { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 },
- { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 },
- { 0x00400001, 0x20500022, 0x00690580, 0x00000000 },
- { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 },
- { 0x00400001, 0x20700022, 0x00690590, 0x00000000 },
- { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x01600031, 0x20000c04, 0x508d0000, 0x06082007 },
- { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
- { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x01600031, 0x28000c01, 0x408d0000, 0x0218a002 },
- { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
- { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x00600001, 0x20400022, 0x008d0800, 0x00000000 },
- { 0x01600031, 0x20000c04, 0x508d0000, 0x04082008 },
- { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
- { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
- { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
- { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
- { 0x00010220, 0x34001c00, 0x02001400, 0xffffff70 },
- { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
- { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
- { 0x00000220, 0x34001c00, 0x00001400, 0xffffff6a },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
- { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
- { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
- { 0x0f000031, 0x20000c04, 0x708d0000, 0x82000000 },
diff --git a/src/shaders/post_processing/nv12_dndi_nv12.g6b b/src/shaders/post_processing/nv12_dndi_nv12.g6b
deleted file mode 100644
index cb99eff..0000000
--- a/src/shaders/post_processing/nv12_dndi_nv12.g6b
+++ /dev/null
@@ -1,159 +0,0 @@
- { 0x00600001, 0x20e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x22e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x23e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x24e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x25e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x26e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x27e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x28e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29000061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29200061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29400061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29600061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29800061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29a00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29c00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x29e00061, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21000021, 0x008d0000, 0x00000000 },
- { 0x00000441, 0x20842e2d, 0x000000b7, 0x00100010 },
- { 0x00000c01, 0x2086022d, 0x000000bb, 0x00000000 },
- { 0x00000801, 0x208a01ad, 0x000000a0, 0x00000000 },
- { 0x00200001, 0x209403bd, 0x006600a4, 0x00000000 },
- { 0x00000040, 0x208435ad, 0x00000084, 0x000000a0 },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x00000001, 0x204801aa, 0x000000a0, 0x00000000 },
- { 0x00000001, 0x205801aa, 0x000000a2, 0x00000000 },
- { 0x02600031, 0x24400cc1, 0x00000020, 0x04cb8004 },
- { 0x00800001, 0x21400229, 0x00b10440, 0x00000000 },
- { 0x00800001, 0x21600229, 0x00b10450, 0x00000000 },
- { 0x00800001, 0x21800229, 0x00b10460, 0x00000000 },
- { 0x00800001, 0x21a00229, 0x00b10470, 0x00000000 },
- { 0x00600001, 0x22400229, 0x00ae0481, 0x00000000 },
- { 0x00600001, 0x23400229, 0x00ae0480, 0x00000000 },
- { 0x00600001, 0x22500229, 0x00ae0491, 0x00000000 },
- { 0x00600001, 0x23500229, 0x00ae0490, 0x00000000 },
- { 0x00600001, 0x22600229, 0x00ae04a1, 0x00000000 },
- { 0x00600001, 0x23600229, 0x00ae04a0, 0x00000000 },
- { 0x00600001, 0x22700229, 0x00ae04b1, 0x00000000 },
- { 0x00600001, 0x23700229, 0x00ae04b0, 0x00000000 },
- { 0x00800001, 0x21c00229, 0x00b104c0, 0x00000000 },
- { 0x00800001, 0x21e00229, 0x00b104d0, 0x00000000 },
- { 0x00800001, 0x22000229, 0x00b104e0, 0x00000000 },
- { 0x00800001, 0x22200229, 0x00b104f0, 0x00000000 },
- { 0x00600001, 0x22800229, 0x00ae0501, 0x00000000 },
- { 0x00600001, 0x23800229, 0x00ae0500, 0x00000000 },
- { 0x00600001, 0x22900229, 0x00ae0511, 0x00000000 },
- { 0x00600001, 0x23900229, 0x00ae0510, 0x00000000 },
- { 0x00600001, 0x22a00229, 0x00ae0521, 0x00000000 },
- { 0x00600001, 0x23a00229, 0x00ae0520, 0x00000000 },
- { 0x00600001, 0x22b00229, 0x00ae0531, 0x00000000 },
- { 0x00600001, 0x23b00229, 0x00ae0530, 0x00000000 },
- { 0x00000008, 0x21003da1, 0x000000a0, 0x00010001 },
- { 0x00000001, 0x210401a1, 0x000000a2, 0x00000000 },
- { 0x00000001, 0x21080061, 0x00000000, 0x00030007 },
- { 0x00600001, 0x21600022, 0x008d0100, 0x00000000 },
- { 0x00600001, 0x21800022, 0x008d0540, 0x00000000 },
- { 0x05600031, 0x20000cc4, 0x00000160, 0x04094014 },
- { 0x00200008, 0x21003da1, 0x004500a0, 0x00020002 },
- { 0x00000040, 0x21002421, 0x00000100, 0x00000034 },
- { 0x00000001, 0x21080061, 0x00000000, 0x00000003 },
- { 0x00600001, 0x21a00022, 0x008d0100, 0x00000000 },
- { 0x00000001, 0x21c00022, 0x00000560, 0x00000000 },
- { 0x05600031, 0x20000cc4, 0x000001a0, 0x04094014 },
- { 0x00400040, 0x22083e28, 0x00690024, 0x07000700 },
- { 0x01000010, 0x20003e2c, 0x0000003b, 0x00010001 },
- { 0x00010220, 0x34001c00, 0x00001400, 0x0000000a },
- { 0x00400001, 0x20400022, 0x00690580, 0x00000000 },
- { 0x00400001, 0x20500022, 0x006904d0, 0x00000000 },
- { 0x00400001, 0x20600022, 0x00690590, 0x00000000 },
- { 0x00400001, 0x20700022, 0x006904f0, 0x00000000 },
- { 0x00000220, 0x34001c00, 0x00001400, 0x00000008 },
- { 0x00400001, 0x20400022, 0x006904c0, 0x00000000 },
- { 0x00400001, 0x20500022, 0x00690580, 0x00000000 },
- { 0x00400001, 0x20600022, 0x006904e0, 0x00000000 },
- { 0x00400001, 0x20700022, 0x00690590, 0x00000000 },
- { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0003000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x05600031, 0x20000cc4, 0x00000020, 0x06094007 },
- { 0x00200040, 0x210035a5, 0x004500a0, 0x00450074 },
- { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x04600031, 0x28000cc1, 0x00000020, 0x02198002 },
- { 0x00200001, 0x210001a1, 0x004500a0, 0x00000000 },
- { 0x0000000c, 0x21043ca5, 0x00000104, 0x00010001 },
- { 0x00000001, 0x21080061, 0x00000000, 0x0001000f },
- { 0x00600001, 0x20200022, 0x008d0100, 0x00000000 },
- { 0x00600001, 0x20400022, 0x008d0800, 0x00000000 },
- { 0x05600031, 0x20000cc4, 0x00000020, 0x04094008 },
- { 0x01000040, 0x20863dad, 0x00000086, 0xffffffff },
- { 0x00000040, 0x20a03dad, 0x000000a0, 0x00100010 },
- { 0x05000010, 0x200035ac, 0x020000a0, 0x00000084 },
- { 0x00010220, 0x34001c00, 0x00001400, 0x00000008 },
- { 0x00010220, 0x34001c00, 0x02001400, 0xffffff70 },
- { 0x00000001, 0x20a001ad, 0x0000008a, 0x00000000 },
- { 0x00000040, 0x20a23dad, 0x000000a2, 0x00080008 },
- { 0x00000220, 0x34001c00, 0x00001400, 0xffffff6a },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
- { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
- { 0x00600001, 0x21e00022, 0x008d0000, 0x00000000 },
- { 0x07000031, 0x20001cc4, 0x000001e0, 0x82000010 },
diff --git a/src/shaders/render/Makefile.am b/src/shaders/render/Makefile.am
index f9540b0..dac58c7 100644
--- a/src/shaders/render/Makefile.am
+++ b/src/shaders/render/Makefile.am
@@ -64,17 +64,23 @@ INTEL_G7B = \
exa_wm_write.g7b \
exa_wm_yuv_rgb.g7b
+# XXX: only regenerate binary for EU code containing JMPI instructions
+INTEL_G7B_HASWELL = \
+ exa_wm_src_sample_planar.g7b.haswell \
+ $(NULL)
+
TARGETS =
if HAVE_GEN4ASM
TARGETS += $(INTEL_G4B)
TARGETS += $(INTEL_G4B_GEN5)
TARGETS += $(INTEL_G6B)
TARGETS += $(INTEL_G7B)
+TARGETS += $(INTEL_G7B_HASWELL)
endif
all-local: $(TARGETS)
-SUFFIXES = .g4a .g4s .g4b .g6a .g6s .g6b .g7a .g7s .g7b
+SUFFIXES = .g4a .g4s .g4b .g6a .g6s .g6b .g7a .g7s .g7b .g7b.haswell
if HAVE_GEN4ASM
$(INTEL_G4S): $(INTEL_G4A) $(INTEL_G4I)
@@ -96,6 +102,8 @@ $(INTEL_G7S): $(INTEL_G7A) $(INTEL_G7I)
$(AM_V_GEN)m4 $< > $@
.g7s.g7b:
$(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $<
+.g7s.g7b.haswell:
+ $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $<
endif
CLEANFILES = \
@@ -113,6 +121,7 @@ EXTRA_DIST = \
$(INTEL_G6B) \
$(INTEL_G7A) \
$(INTEL_G7B) \
+ $(INTEL_G7B_HASWELL) \
$(NULL)
# Extra clean files so that maintainer-clean removes *everything*
diff --git a/src/shaders/render/exa_wm_src_sample_planar.g7b.haswell b/src/shaders/render/exa_wm_src_sample_planar.g7b.haswell
new file mode 100644
index 0000000..dc388c2
--- /dev/null
+++ b/src/shaders/render/exa_wm_src_sample_planar.g7b.haswell
@@ -0,0 +1,20 @@
+ { 0x01000010, 0x20002d3c, 0x000000c0, 0x00010001 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x000000c0 },
+ { 0x01000010, 0x20002d3c, 0x000000c0, 0x00020002 },
+ { 0x00010020, 0x34001c00, 0x00001400, 0x00000070 },
+ { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+ { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x22001ca9, 0x00000820, 0x0a2c0203 },
+ { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+ { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x22401ca9, 0x00000820, 0x0a2c0405 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000060 },
+ { 0x00800201, 0x220003fd, 0x00000000, 0x3f000000 },
+ { 0x00800201, 0x224003fd, 0x00000000, 0x3f000000 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000030 },
+ { 0x00000201, 0x20080061, 0x00000000, 0x0000c000 },
+ { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x22001ca9, 0x00000820, 0x0a4c0203 },
+ { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+ { 0x00600201, 0x28200021, 0x008d0000, 0x00000000 },
+ { 0x02800031, 0x21c01ca9, 0x00000820, 0x0a2c0001 },
diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am
index d58a0be..12f0e28 100644
--- a/src/shaders/vme/Makefile.am
+++ b/src/shaders/vme/Makefile.am
@@ -1,4 +1,5 @@
VME_CORE = intra_frame.asm inter_frame.asm
+VME75_CORE = intra_frame_haswell.asm inter_frame_haswell.asm
INTEL_G6B = intra_frame.g6b inter_frame.g6b
INTEL_G6A = intra_frame.g6a inter_frame.g6a
@@ -10,15 +11,21 @@ INTEL_G7A = intra_frame.g7a inter_frame.g7a
INTEL_GEN7_INC = gen7_vme_header.inc
INTEL_GEN7_ASM = $(INTEL_G7A:%.g7a=%.gen7.asm)
+INTEL_G75B = intra_frame_haswell.g75b inter_frame_haswell.g75b
+INTEL_G75A = intra_frame_haswell.g75a inter_frame_haswell.g75a
+INTEL_GEN75_INC = vme75.inc
+INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm)
+
TARGETS =
if HAVE_GEN4ASM
TARGETS += $(INTEL_G6B)
TARGETS += $(INTEL_G7B)
+TARGETS += $(INTEL_G75B)
endif
all-local: $(TARGETS)
-SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm
+SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm
if HAVE_GEN4ASM
$(INTEL_GEN6_ASM): $(VME_CORE) $(INTEL_GEN6_INC)
@@ -32,18 +39,31 @@ $(INTEL_GEN7_ASM): $(VME_CORE) $(INTEL_GEN7_INC)
$(AM_V_GEN)m4 $< > $@
.gen7.asm.g7b:
$(AM_V_GEN)$(GEN4ASM) -g 7 -o $@ $<
+
+
+$(INTEL_GEN75_ASM): $(VME75_CORE) $(INTEL_GEN75_INC)
+.g75a.gen75.asm:
+ $(AM_V_GEN)cpp -P $< > _vme0.$@ && \
+ m4 _vme0.$@ > $@ && \
+ rm _vme0.$@
+.gen75.asm.g75b:
+ $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $<
endif
-CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM)
+CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM)
EXTRA_DIST = \
$(INTEL_G6A) \
$(INTEL_G6B) \
$(INTEL_G7A) \
$(INTEL_G7B) \
+ $(INTEL_G75A) \
+ $(INTEL_G75B) \
$(INTEL_GEN6_INC) \
$(INTEL_GEN7_INC) \
+ $(INTEL_GEN75_INC) \
$(VME_CORE) \
+ $(VME75_CORE) \
$(NULL)
# Extra clean files so that maintainer-clean removes *everything*
diff --git a/src/shaders/vme/inter_frame_haswell.asm b/src/shaders/vme/inter_frame_haswell.asm
new file mode 100644
index 0000000..b6f8eb5
--- /dev/null
+++ b/src/shaders/vme/inter_frame_haswell.asm
@@ -0,0 +1,405 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: IntraFrame.asm
+//
+// Make intra predition estimation for Intra frame
+//
+
+//
+// Now, begin source code....
+//
+
+/*
+ * __START
+ */
+__INTRA_START:
+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ;
+mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ;
+
+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
+mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1};
+mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
+mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1};
+mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 24:UD {align1};
+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/*
+ * Media Read Message -- fetch Luma neighbor edge pixels
+ */
+/* ROW */
+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
+
+/* m2, get the MV/Mb cost passed from constant buffer when
+spawning thread by MEDIA_OBJECT */
+mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+
+/* m3 */
+mov (8) vme_msg_3<1>:UD 0x0:UD {align1};
+
+/* m4 */
+mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1};
+and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1};
+mov (8) vme_msg_4<1>:UD INEP_ROW.0<8,8,1>:UD {align1};
+
+/* m5 */
+mov (8) vme_msg_5<1>:UD 0x0:UD {align1};
+mov (16) vme_msg_5.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1};
+mov (1) vme_msg_5.16<1>:UD INTRA_PREDICTORE_MODE {align1};
+
+/* the penalty for Intra mode */
+mov (1) vme_msg_5.28<1>:UD 0x010101:UD {align1};
+
+
+/* m6 */
+
+mov (8) vme_msg_6<1>:UD 0x0:UD {align1};
+
+/*
+ * SIC VME message
+ */
+/* m0 */
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1};
+/* Use the Luma mode */
+mov (1) vme_msg_4.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* m1 */
+mov (1) intra_flag<1>:UW 0x0:UW {align1} ;
+and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
+(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1};
+
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
+
+/* Disable DC HAAR component when calculating HARR SATD block */
+mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1};
+mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */
+/* m0 */
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_SIC_MESSAGE_TYPE
+ )
+ mlen sic_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+/*
+ * Oword Block Write message
+ */
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
+
+/* Distortion, Intra (17-16), */
+mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1};
+
+mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1};
+/* VME clock counts */
+mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* IME search */
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
+mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
+
+mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1};
+
+add (1) vme_m0.0<1>:W vme_m0.0<0,1,0>:W -16:W {align1}; /* Reference = (x-16,y-12)-(x+32,y+28) */
+add (1) vme_m0.2<1>:W vme_m0.2<0,1,0>:W -12:W {align1};
+
+mov (1) vme_m0.0<1>:W -16:W {align1};
+mov (1) vme_m0.2<1>:W -12:W {align1};
+
+mov (1) vme_m0.4<1>:UD vme_m0.0<0,1,0>:UD {align1};
+
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+
+mov (1) vme_m1.0<1>:UD ADAPTIVE_SEARCH_ENABLE:ud {align1} ;
+mov (1) vme_m1.4<1>:UD MAX_NUM_MV:UD {align1}; /* Default value MAX 32 MVs */
+mov (1) vme_m1.8<1>:UD START_CENTER + SEARCH_PATH_LEN:UD {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1};
+/* M3/M4 search path */
+
+mov (1) vme_msg_3.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_3.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_3.12<1>:UD 0x100F0F0F:UD {align1};
+mov (1) vme_msg_3.16<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_3.20<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_3.24<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_3.28<1>:UD 0x100F0F0F:UD {align1};
+
+mov (1) vme_msg_4.0<1>:UD 0x01010101:UD {align1};
+mov (1) vme_msg_4.4<1>:UD 0x10010101:UD {align1};
+mov (1) vme_msg_4.8<1>:UD 0x0F0F0F0F:UD {align1};
+mov (1) vme_msg_4.12<1>:UD 0x000F0F0F:UD {align1};
+
+mov (4) vme_msg_4.16<1>:UD 0x0:UD {align1};
+
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ vme(
+ BIND_IDX_VME,
+ 0,
+ 0,
+ VME_IME_MESSAGE_TYPE
+ )
+ mlen ime_vme_msg_length
+ rlen vme_wb_length {align1};
+
+/* Set Macroblock-shape/mode for FBR */
+
+mov (1) vme_m2.20<1>:UD 0x0:UD {align1};
+mov (1) vme_m2.21<1>:UB vme_wb.25<0,1,0>:UB {align1};
+mov (1) vme_m2.22<1>:UB vme_wb.26<0,1,0>:UB {align1};
+
+and (1) tmp_reg0.0<1>:UW vme_wb.0<0,1,0>:UW 0x03:UW {align1};
+mov (1) vme_m2.20<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* Write IME inter info */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1};
+/* Inter distortion of IME */
+mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.12<1>:UD obw_m0.8<0,1,0>:UD {align1};
+
+/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_0,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* Write IME MV */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_8,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 5
+ rlen obw_wb_length
+ {align1};
+
+/* Write IME RefID */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* Send FBR message into CRE */
+
+mov (8) vme_msg_3.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) vme_msg_4.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) vme_msg_5.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) vme_msg_6.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+
+mov (1) vme_m0.12<1>:UD INTER_SAD_HAAR + SUB_PEL_MODE_QUARTER + FBR_BME_DISABLE:UD {align1}; /* 16x16 Source, 1/4 pixel, harr, BME disable */
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+mov (8) vme_msg_2.0<1>:UD vme_m2.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_FBR_MESSAGE_TYPE
+ )
+ mlen fbr_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+/* write FME info */
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.4<1>:UD vme_wb.24<0,1,0>:UD {align1};
+/* Inter distortion of FME */
+mov (1) msg_reg1.8<1>:UD vme_wb.8<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.12<1>:UD vme_m2.20<0,1,0>:UD {align1};
+
+/* bind index 3, write oword (16bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_0,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+/* Write FME/BME MV */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x01:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0.0<8,8,1>:UD {align1};
+
+
+mov (8) msg_reg1.0<1>:UD vme_wb1.0<8,8,1>:UD {align1};
+mov (8) msg_reg2.0<1>:ud vme_wb2.0<8,8,1>:ud {align1};
+mov (8) msg_reg3.0<1>:ud vme_wb3.0<8,8,1>:ud {align1};
+mov (8) msg_reg4.0<1>:ud vme_wb4.0<8,8,1>:ud {align1};
+/* bind index 3, write 8 oword (128 bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_8,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 5
+ rlen obw_wb_length
+ {align1};
+
+/* Write FME/BME RefID */
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x08:UD {align1};
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (8) msg_reg1.0<1>:UD vme_wb6.0<8,8,1>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+__EXIT:
+/*
+ * kill thread
+ */
+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
diff --git a/src/shaders/vme/inter_frame_haswell.g75a b/src/shaders/vme/inter_frame_haswell.g75a
new file mode 100644
index 0000000..e95ed93
--- /dev/null
+++ b/src/shaders/vme/inter_frame_haswell.g75a
@@ -0,0 +1,2 @@
+#include "vme75.inc"
+#include "inter_frame_haswell.asm"
diff --git a/src/shaders/vme/inter_frame_haswell.g75b b/src/shaders/vme/inter_frame_haswell.g75b
new file mode 100644
index 0000000..86971d4
--- /dev/null
+++ b/src/shaders/vme/inter_frame_haswell.g75b
@@ -0,0 +1,137 @@
+ { 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
+ { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
+ { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
+ { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
+ { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
+ { 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
+ { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
+ { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
+ { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
+ { 0x00000041, 0x24880c21, 0x00000488, 0x00000018 },
+ { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
+ { 0x04600031, 0x23801cb1, 0x00000800, 0x02190004 },
+ { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 },
+ { 0x04600031, 0x23a01cb1, 0x00000800, 0x02290004 },
+ { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00000005, 0x23840c21, 0x00000384, 0xff000000 },
+ { 0x00600001, 0x28800021, 0x008d0380, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x28a00231, 0x00cf03a3, 0x00000000 },
+ { 0x00000001, 0x28b00061, 0x00000000, 0x11111111 },
+ { 0x00000001, 0x28bc0061, 0x00000000, 0x00010101 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00010001 },
+ { 0x00000001, 0x28850231, 0x00000400, 0x00000000 },
+ { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
+ { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 },
+ { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00200020 },
+ { 0x00000001, 0x247e0231, 0x00000400, 0x00000000 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x00800000 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+ { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e782000 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+ { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240021, 0x00000190, 0x00000000 },
+ { 0x00000001, 0x28280021, 0x00000194, 0x00000000 },
+ { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28300129, 0x0000018c, 0x00000000 },
+ { 0x00000001, 0x28340021, 0x00000188, 0x00000000 },
+ { 0x00000001, 0x28380021, 0x0000019c, 0x00000000 },
+ { 0x00000001, 0x283c0021, 0x00000488, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x00200000 },
+ { 0x00000001, 0x24560169, 0x00000000, 0x28302830 },
+ { 0x00000001, 0x24400021, 0x00000448, 0x00000000 },
+ { 0x00000040, 0x24403dad, 0x00000440, 0xfff0fff0 },
+ { 0x00000040, 0x24423dad, 0x00000442, 0xfff4fff4 },
+ { 0x00000001, 0x244001ed, 0x00000000, 0xfff0fff0 },
+ { 0x00000001, 0x244201ed, 0x00000000, 0xfff4fff4 },
+ { 0x00000001, 0x24440021, 0x00000440, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x24600061, 0x00000000, 0x00000002 },
+ { 0x00000001, 0x24640061, 0x00000000, 0x00000020 },
+ { 0x00000001, 0x24680061, 0x00000000, 0x30003030 },
+ { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+ { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
+ { 0x00000001, 0x28600061, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28640061, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28680061, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x286c0061, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28700061, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28740061, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28780061, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x287c0061, 0x00000000, 0x100f0f0f },
+ { 0x00000001, 0x28800061, 0x00000000, 0x01010101 },
+ { 0x00000001, 0x28840061, 0x00000000, 0x10010101 },
+ { 0x00000001, 0x28880061, 0x00000000, 0x0f0f0f0f },
+ { 0x00000001, 0x288c0061, 0x00000000, 0x000f0f0f },
+ { 0x00400001, 0x28900061, 0x00000000, 0x00000000 },
+ { 0x08600031, 0x21801ca1, 0x00000800, 0x0a784000 },
+ { 0x00000001, 0x25740061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x25750231, 0x00000199, 0x00000000 },
+ { 0x00000001, 0x25760231, 0x0000019a, 0x00000000 },
+ { 0x00000005, 0x24002d29, 0x00000180, 0x00030003 },
+ { 0x00000001, 0x25740231, 0x00000400, 0x00000000 },
+ { 0x00000040, 0x24880c21, 0x00000488, 0x00000002 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+ { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240021, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28280021, 0x00000188, 0x00000000 },
+ { 0x00000001, 0x282c0021, 0x00000488, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 },
+ { 0x00000040, 0x24880c21, 0x00000488, 0x00000001 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x28400021, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28600021, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28800021, 0x008d0200, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0403 },
+ { 0x00000040, 0x24880c21, 0x00000488, 0x00000008 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0240, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
+ { 0x00600001, 0x28600021, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x28800021, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28a00021, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28c00021, 0x008d0200, 0x00000000 },
+ { 0x00000001, 0x244c0061, 0x00000000, 0x00243000 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+ { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
+ { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e786000 },
+ { 0x00000040, 0x24880c21, 0x00000488, 0x00000002 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+ { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240021, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28280021, 0x00000188, 0x00000000 },
+ { 0x00000001, 0x282c0021, 0x00000574, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0003 },
+ { 0x00000040, 0x24880c21, 0x00000488, 0x00000001 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x28400021, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x28600021, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x28800021, 0x008d0200, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x0a0a0403 },
+ { 0x00000040, 0x24880c21, 0x00000488, 0x00000008 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0240, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
+ { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 },
+ { 0x07800031, 0x24001ca8, 0x00000e00, 0x82000010 },
diff --git a/src/shaders/vme/intra_frame_haswell.asm b/src/shaders/vme/intra_frame_haswell.asm
new file mode 100644
index 0000000..64efd55
--- /dev/null
+++ b/src/shaders/vme/intra_frame_haswell.asm
@@ -0,0 +1,160 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: IntraFrame.asm
+//
+// Make intra predition estimation for Intra frame
+//
+
+//
+// Now, begin source code....
+//
+
+/*
+ * __START
+ */
+__INTRA_START:
+mov (16) tmp_reg0.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg2.0<1>:UD 0x0:UD {align1};
+mov (16) tmp_reg4.0<1>:UD 0x0:UD {align1} ;
+mov (16) tmp_reg6.0<1>:UD 0x0:UD {align1} ;
+
+shl (2) read0_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read0_header.0<1>:D read0_header.0<0,1,0>:D -8:W {align1}; /* X offset */
+add (1) read0_header.4<1>:D read0_header.4<0,1,0>:D -1:W {align1}; /* Y offset */
+mov (1) read0_header.8<1>:UD BLOCK_32X1 {align1};
+mov (1) read0_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) read1_header.0<1>:D orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+add (1) read1_header.0<1>:D read1_header.0<0,1,0>:D -4:W {align1}; /* X offset */
+mov (1) read1_header.8<1>:UD BLOCK_4X16 {align1};
+mov (1) read1_header.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+shl (2) vme_m0.8<1>:UW orig_xy_ub<2,2,1>:UB 4:UW {align1}; /* (x, y) * 16 */
+mov (1) vme_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+mul (1) obw_m0.8<1>:UD w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul (1) obw_m0.8<1>:UD obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov (1) obw_m0.20<1>:UB thread_id_ub {align1}; /* dispatch id */
+
+/*
+ * Media Read Message -- fetch Luma neighbor edge pixels
+ */
+/* ROW */
+mov (8) msg_reg0.0<1>:UD read0_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1};
+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
+
+/* m2, get the MV/Mb cost passed by constant buffer
+when creating EU thread by MEDIA_OBJECT */
+mov (8) vme_msg_2<1>:UD r1.0<8,8,1>:UD {align1};
+
+/* m3 */
+mov (8) vme_msg_3<1>:UD 0x0:UD {align1};
+
+/* m4 */
+mov (1) INEP_ROW.0<1>:UD 0x0:UD {align1};
+and (1) INEP_ROW.4<1>:UD INEP_ROW.4<0,1,0>:UD 0xFF000000:UD {align1};
+mov (8) vme_msg_4<1>:UD INEP_ROW.0<8,8,1>:UD {align1};
+
+/* m5 */
+mov (8) vme_msg_5<1>:UD 0x0:UD {align1};
+mov (16) vme_msg_5.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1};
+mov (1) vme_msg_5.16<1>:UD INTRA_PREDICTORE_MODE {align1};
+
+/* the penalty for Intra mode */
+mov (1) vme_msg_5.28<1>:UD 0x010101:UD {align1};
+
+
+/* m6 */
+
+mov (8) vme_msg_6<1>:UD 0x0:UD {align1};
+
+/*
+ * VME message
+ */
+/* m0 */
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+mov (1) tmp_reg0.0<1>:UW LUMA_INTRA_MODE:UW {align1};
+/* Use the Luma mode */
+mov (1) vme_msg_4.5<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* m1 */
+mov (1) intra_flag<1>:UW 0x0:UW {align1} ;
+and.z.f0.0 (1) null<1>:UW transform_8x8_ub<0,1,0>:UB 1:UW {align1};
+(f0.0) mov (1) intra_part_mask_ub<1>:UB LUMA_INTRA_8x8_DISABLE {align1};
+
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1};
+
+/* Disable DC HAAR component when calculating HARR SATD block */
+mov (1) tmp_reg0.0<1>:UW DC_HARR_DISABLE:UW {align1};
+mov (1) vme_m1.30<1>:UB tmp_reg0.0<0,1,0>:UB {align1};
+
+/* m0 */
+mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
+mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+ vme_msg_ind
+ vme_wb<1>:UD
+ null
+ cre(
+ BIND_IDX_VME,
+ VME_SIC_MESSAGE_TYPE
+ )
+ mlen sic_vme_msg_length
+ rlen vme_wb_length
+ {align1};
+/*
+ * Oword Block Write message
+ */
+mov (8) msg_reg0.0<1>:UD obw_m0<8,8,1>:UD {align1};
+
+mov (1) msg_reg1.0<1>:UD vme_wb.0<0,1,0>:UD {align1};
+mov (1) msg_reg1.4<1>:UD vme_wb.16<0,1,0>:UD {align1};
+mov (1) msg_reg1.8<1>:UD vme_wb.20<0,1,0>:UD {align1};
+mov (1) msg_reg1.12<1>:UD vme_wb.24<0,1,0>:UD {align1};
+
+/* Distortion, Intra (17-16), */
+mov (1) msg_reg1.16<1>:UW vme_wb.12<0,1,0>:UW {align1};
+
+mov (1) msg_reg1.20<1>:UD vme_wb.8<0,1,0>:UD {align1};
+/* VME clock counts */
+mov (1) msg_reg1.24<1>:UD vme_wb.28<0,1,0>:UD {align1};
+
+mov (1) msg_reg1.28<1>:UD obw_m0.8<0,1,0>:UD {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+ msg_ind
+ obw_wb
+ null
+ data_port(
+ OBW_CACHE_TYPE,
+ OBW_MESSAGE_TYPE,
+ OBW_CONTROL_2,
+ OBW_BIND_IDX,
+ OBW_WRITE_COMMIT_CATEGORY,
+ OBW_HEADER_PRESENT
+ )
+ mlen 2
+ rlen obw_wb_length
+ {align1};
+
+__EXIT:
+/*
+ * kill thread
+ */
+mov (8) ts_msg_reg0<1>:UD r0<8,8,1>:UD {align1};
+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
diff --git a/src/shaders/vme/intra_frame_haswell.g75a b/src/shaders/vme/intra_frame_haswell.g75a
new file mode 100644
index 0000000..a690fdd
--- /dev/null
+++ b/src/shaders/vme/intra_frame_haswell.g75a
@@ -0,0 +1,2 @@
+#include "vme75.inc"
+#include "intra_frame_haswell.asm"
diff --git a/src/shaders/vme/intra_frame_haswell.g75b b/src/shaders/vme/intra_frame_haswell.g75b
new file mode 100644
index 0000000..5ae7a99
--- /dev/null
+++ b/src/shaders/vme/intra_frame_haswell.g75b
@@ -0,0 +1,57 @@
+ { 0x00800001, 0x24000061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24400061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24800061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x24c00061, 0x00000000, 0x00000000 },
+ { 0x00200009, 0x24002e25, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24003ca5, 0x00000400, 0xfff8fff8 },
+ { 0x00000040, 0x24043ca5, 0x00000404, 0xffffffff },
+ { 0x00000001, 0x240800e1, 0x00000000, 0x0000001f },
+ { 0x00000001, 0x24140231, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24202e25, 0x004500a0, 0x00040004 },
+ { 0x00000040, 0x24203ca5, 0x00000420, 0xfffcfffc },
+ { 0x00000001, 0x242800e1, 0x00000000, 0x000f0003 },
+ { 0x00000001, 0x24340231, 0x00000014, 0x00000000 },
+ { 0x00200009, 0x24482e29, 0x004500a0, 0x00040004 },
+ { 0x00000001, 0x24540231, 0x00000014, 0x00000000 },
+ { 0x00000041, 0x24884521, 0x000000a2, 0x000000a1 },
+ { 0x00000040, 0x24884421, 0x00000488, 0x000000a0 },
+ { 0x00000041, 0x24880c21, 0x00000488, 0x00000002 },
+ { 0x00000001, 0x24940231, 0x00000014, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0400, 0x00000000 },
+ { 0x04600031, 0x23801cb1, 0x00000800, 0x02190004 },
+ { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 },
+ { 0x04600031, 0x23a01cb1, 0x00000800, 0x02290004 },
+ { 0x00600001, 0x28400021, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
+ { 0x00000001, 0x23800061, 0x00000000, 0x00000000 },
+ { 0x00000005, 0x23840c21, 0x00000384, 0xff000000 },
+ { 0x00600001, 0x28800021, 0x008d0380, 0x00000000 },
+ { 0x00600001, 0x28a00061, 0x00000000, 0x00000000 },
+ { 0x00800001, 0x28a00231, 0x00cf03a3, 0x00000000 },
+ { 0x00000001, 0x28b00061, 0x00000000, 0x11111111 },
+ { 0x00000001, 0x28bc0061, 0x00000000, 0x00010101 },
+ { 0x00600001, 0x28c00061, 0x00000000, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00010001 },
+ { 0x00000001, 0x28850231, 0x00000400, 0x00000000 },
+ { 0x00000001, 0x247c0169, 0x00000000, 0x00000000 },
+ { 0x01000005, 0x20002e28, 0x000000a4, 0x00010001 },
+ { 0x00010001, 0x247c00f1, 0x00000000, 0x00000002 },
+ { 0x00000001, 0x247d0231, 0x000000a5, 0x00000000 },
+ { 0x00000001, 0x24000169, 0x00000000, 0x00200020 },
+ { 0x00000001, 0x247e0231, 0x00000400, 0x00000000 },
+ { 0x00600001, 0x28000021, 0x008d0440, 0x00000000 },
+ { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
+ { 0x0d600031, 0x21801ca1, 0x00000800, 0x0e782000 },
+ { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
+ { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
+ { 0x00000001, 0x28240021, 0x00000190, 0x00000000 },
+ { 0x00000001, 0x28280021, 0x00000194, 0x00000000 },
+ { 0x00000001, 0x282c0021, 0x00000198, 0x00000000 },
+ { 0x00000001, 0x28300129, 0x0000018c, 0x00000000 },
+ { 0x00000001, 0x28340021, 0x00000188, 0x00000000 },
+ { 0x00000001, 0x28380021, 0x0000019c, 0x00000000 },
+ { 0x00000001, 0x283c0021, 0x00000488, 0x00000000 },
+ { 0x0a800031, 0x20001cac, 0x00000800, 0x040a0203 },
+ { 0x00600001, 0x2e000021, 0x008d0000, 0x00000000 },
+ { 0x07800031, 0x24001ca8, 0x00000e00, 0x82000010 },
diff --git a/src/shaders/vme/vme75.inc b/src/shaders/vme/vme75.inc
new file mode 100644
index 0000000..d48daa0
--- /dev/null
+++ b/src/shaders/vme/vme75.inc
@@ -0,0 +1,268 @@
+/*
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+// Modual name: ME_header.inc
+//
+// Global symbols define
+//
+
+/*
+ * Constant
+ */
+define(`VME_MESSAGE_TYPE_INTER', `1')
+define(`VME_MESSAGE_TYPE_INTRA', `2')
+define(`VME_MESSAGE_TYPE_MIXED', `3')
+
+define(`VME_SIC_MESSAGE_TYPE', `1')
+define(`VME_IME_MESSAGE_TYPE', `2')
+define(`VME_FBR_MESSAGE_TYPE', `3')
+
+define(`BLOCK_32X1', `0x0000001F')
+define(`BLOCK_4X16', `0x000F0003')
+define(`BLOCK_8X4', `0x00070003')
+
+define(`LUMA_INTRA_16x16_DISABLE', `0x1')
+define(`LUMA_INTRA_8x8_DISABLE', `0x2')
+define(`LUMA_INTRA_4x4_DISABLE', `0x4')
+
+define(`INTRA_PRED_AVAIL_FLAG_AE', `0x60')
+define(`INTRA_PRED_AVAIL_FLAG_B', `0x10')
+define(`INTRA_PRED_AVAIL_FLAG_C', `0x8')
+define(`INTRA_PRED_AVAIL_FLAG_D', `0x4')
+
+define(`BIND_IDX_VME', `0')
+define(`BIND_IDX_VME_REF0', `1')
+define(`BIND_IDX_VME_REF1', `2')
+define(`BIND_IDX_OUTPUT', `3')
+define(`BIND_IDX_INEP', `4')
+
+define(`SUB_PEL_MODE_INTEGER', `0x00000000')
+define(`SUB_PEL_MODE_HALF', `0x00001000')
+define(`SUB_PEL_MODE_QUARTER', `0x00003000')
+
+define(`INTER_SAD_NONE', `0x00000000')
+define(`INTER_SAD_HAAR', `0x00200000')
+
+define(`INTRA_SAD_NONE', `0x00000000')
+define(`INTRA_SAD_HAAR', `0x00800000')
+
+define(`INTER_PART_MASK', `0x00000000')
+
+define(`SEARCH_CTRL_SINGLE', `0x00000000')
+define(`SEARCH_CTRL_DUAL_START', `0x00000100')
+define(`SEARCH_CTRL_DUAL_RECORD', `0x00000300')
+define(`SEARCH_CTRL_DUAL_REFERENCE', `0x00000700')
+
+define(`REF_REGION_SIZE', `0x2830:UW')
+
+define(`BI_SUB_MB_PART_MASK', `0x0c000000')
+define(`MAX_NUM_MV', `0x00000020')
+define(`FB_PRUNING_ENABLE', `0x40000000')
+
+define(`SEARCH_PATH_LEN', `0x00003030')
+define(`START_CENTER', `0x30000000')
+
+define(`ADAPTIVE_SEARCH_ENABLE', `0x00000002')
+define(`INTRA_PREDICTORE_MODE', `0x11111111:UD')
+
+define(`INTER_VME_OUTPUT_IN_OWS', `10')
+define(`INTER_VME_OUTPUT_MV_IN_OWS', `8')
+
+define(`INTRAMBFLAG_MASK', `0x00002000')
+define(`MVSIZE_UW_BASE', `0x0040')
+define(`MFC_MV32_BIT_SHIFT', `5')
+define(`CBP_DC_YUV_UW', `0x000E')
+
+define(`DC_HARR_ENABLE', `0x0000')
+define(`DC_HARR_DISABLE', `0x0020')
+
+define(`MV32_BIT_MASK', `0x0020')
+define(`MV32_BIT_SHIFT', `5')
+
+define(`OBW_CACHE_TYPE', `10')
+
+
+define(`OBW_MESSAGE_TYPE', `8')
+
+define(`OBW_BIND_IDX', `BIND_IDX_OUTPUT')
+
+define(`OBW_CONTROL_0', `0') /* 1 OWord, low 128 bits */
+define(`OBW_CONTROL_1', `1') /* 1 OWord, high 128 bits */
+define(`OBW_CONTROL_2', `2') /* 2 OWords */
+define(`OBW_CONTROL_3', `3') /* 4 OWords */
+define(`OBW_CONTROL_8', `4') /* 8 OWords */
+
+define(`FBR_BME_ENABLE', `0x00000000')
+define(`FBR_BME_DISABLE', `0x00040000')
+
+define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */
+
+
+define(`OBW_HEADER_PRESENT', `1')
+
+/* GRF registers
+ * r0 header
+ * r1~r4 constant buffer (reserved)
+ * r5 inline data
+ * r6~r11 reserved
+ * r12 write back of VME message
+ * r13 write back of Oword Block Write
+ */
+/*
+ * GRF 0 -- header
+ */
+define(`thread_id_ub', `r0.20<0,1,0>:UB') /* thread id in payload */
+
+/*
+ * GRF 1~4 -- Constant Buffer (reserved)
+ */
+
+/*
+ * GRF 5 -- inline data
+ */
+define(`inline_reg0', `r5')
+define(`w_in_mb_uw', `inline_reg0.2')
+define(`orig_xy_ub', `inline_reg0.0')
+define(`orig_x_ub', `inline_reg0.0') /* in macroblock */
+define(`orig_y_ub', `inline_reg0.1')
+define(`transform_8x8_ub', `inline_reg0.4')
+define(`slice_edge_ub', `inline_reg0.4')
+define(`num_macroblocks', `inline_reg0.6')
+define(`input_mb_intra_ub', `inline_reg0.5')
+
+/*
+ * GRF 6~11 -- reserved
+ */
+
+/*
+ * GRF 12~15 -- write back for VME message
+ */
+define(`vme_wb', `r12')
+define(`vme_wb0', `r12')
+define(`vme_wb1', `r13')
+define(`vme_wb2', `r14')
+define(`vme_wb3', `r15')
+define(`vme_wb4', `r16')
+define(`vme_wb5', `r17')
+define(`vme_wb6', `r18')
+define(`vme_ime_wb7', `r19')
+define(`vme_ime_wb8', `r20')
+define(`vme_ime_wb9', `r21')
+define(`vme_ime_wb10', `r22')
+
+
+/*
+ * GRF 24 -- write for VME output message
+ */
+define(`obw_wb', `null<1>:W')
+define(`obw_wb_length', `0')
+
+
+/*
+ * GRF 28~30 -- Intra Neighbor Edge Pixels
+ */
+define(`INEP_ROW', `r28')
+define(`INEP_COL0', `r29')
+define(`INEP_COL1', `r30')
+
+/*
+ * GRF 48~50 -- Chroma Neighbor Edge Pixels
+ */
+define(`CHROMA_ROW', `r48')
+define(`CHROMA_COL', `r49')
+
+/*
+ * temporary registers
+ */
+define(`tmp_reg0', `r32')
+define(`read0_header', `tmp_reg0')
+define(`tmp_reg1', `r33')
+define(`read1_header', `tmp_reg1')
+define(`tmp_reg2', `r34')
+define(`vme_m0', `tmp_reg2')
+define(`tmp_reg3', `r35')
+define(`vme_m1', `tmp_reg3')
+define(`intra_flag', `vme_m1.28')
+define(`intra_part_mask_ub', `vme_m1.28')
+define(`mb_intra_struct_ub', `vme_m1.29')
+define(`tmp_reg4', `r36')
+define(`obw_m0', `tmp_reg4')
+define(`tmp_reg5', `r37')
+define(`obw_m1', `tmp_reg5')
+define(`tmp_reg6', `r38')
+define(`obw_m2', `tmp_reg6')
+define(`tmp_reg7', `r39')
+define(`obw_m3', `tmp_reg7')
+define(`tmp_reg8', `r40')
+define(`obw_m4', `tmp_reg8')
+define(`tmp_reg9', `r41')
+define(`tmp_x_w', `tmp_reg9.0')
+define(`tmp_rega', `r42')
+define(`tmp_ud0', `tmp_rega.0')
+define(`tmp_ud1', `tmp_rega.4')
+define(`tmp_ud2', `tmp_rega.8')
+define(`tmp_ud3', `tmp_rega.12')
+define(`tmp_uw0', `tmp_rega.0')
+define(`tmp_uw1', `tmp_rega.2')
+define(`tmp_uw2', `tmp_rega.4')
+define(`tmp_uw3', `tmp_rega.6')
+define(`tmp_uw4', `tmp_rega.8')
+define(`tmp_uw5', `tmp_rega.10')
+define(`tmp_uw6', `tmp_rega.12')
+define(`tmp_uw7', `tmp_rega.14')
+
+define(`vme_m2', `r43')
+/*
+ * MRF registers
+ */
+
+define(`msg_ind', `64')
+define(`msg_reg0', `r64')
+define(`msg_reg1', `r65')
+define(`msg_reg2', `r66')
+define(`msg_reg3', `r67')
+define(`msg_reg4', `r68')
+define(`msg_reg5', `r69')
+define(`msg_reg6', `r70')
+define(`msg_reg7', `r71')
+define(`msg_reg8', `r72')
+define(`msg_reg9', `r73')
+
+define(`ts_msg_ind', `112')
+define(`ts_msg_reg0', `r112')
+/*
+ * VME message payload
+ */
+
+define(`vme_intra_wb_length', `1')
+define(`vme_wb_length', `7')
+define(`sic_vme_msg_length', `7')
+define(`fbr_vme_msg_length', `7')
+define(`ime_vme_msg_length', `5')
+
+define(`vme_msg_ind', `msg_ind')
+define(`vme_msg_0', `msg_reg0')
+define(`vme_msg_1', `msg_reg1')
+define(`vme_msg_2', `msg_reg2')
+
+define(`vme_msg_3', `msg_reg3')
+define(`vme_msg_4', `msg_reg4')
+
+
+define(`vme_msg_5', `msg_reg5')
+define(`vme_msg_6', `msg_reg6')
+define(`vme_msg_7', `msg_reg7')
+define(`vme_msg_8', `msg_reg8')
+define(`vme_msg_9', `msg_reg9')
+
+define(`BIND_IDX_CBCR', `6')
+
+
+define(`LUMA_CHROMA_MODE', `0x0')
+define(`LUMA_INTRA_MODE', `0x1')
+define(`LUMA_INTRA_DISABLE', `0x2')
diff --git a/src/sysdeps.h b/src/sysdeps.h
new file mode 100644
index 0000000..a713d20
--- /dev/null
+++ b/src/sysdeps.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SYSDEPS_H
+#define SYSDEPS_H
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <assert.h>
+
+#endif /* SYSDEPS_H */
diff --git a/src/va_backend_compat.h b/src/va_backend_compat.h
new file mode 100644
index 0000000..f5c9f75
--- /dev/null
+++ b/src/va_backend_compat.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef VA_BACKEND_COMPAT_H
+#define VA_BACKEND_COMPAT_H
+
+#include <va/va_backend.h>
+
+#if VA_CHECK_VERSION(0,33,0)
+# include <va/va_drmcommon.h>
+
+# define VA_CHECK_DRM_AUTH_TYPE(ctx, type) \
+ (((struct drm_state *)(ctx)->drm_state)->auth_type == (type))
+
+#else
+# include <va/va_dricommon.h>
+
+# define VA_CHECK_DRM_AUTH_TYPE(ctx, type) \
+ (((struct dri_state *)(ctx)->dri_state)->driConnectedFlag == (type))
+
+# define drm_state dri_state
+# define VA_DRM_AUTH_DRI1 VA_DRI1
+# define VA_DRM_AUTH_DRI2 VA_DRI2
+# define VA_DRM_AUTH_CUSTOM VA_DUMMY
+#endif
+
+#endif /* VA_BACKEND_COMPAT_H */
diff --git a/src/wayland-drm-client-protocol.h b/src/wayland-drm-client-protocol.h
new file mode 100644
index 0000000..cba188e
--- /dev/null
+++ b/src/wayland-drm-client-protocol.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright © 2008-2011 Kristian Høgsberg
+ * Copyright © 2010-2011 Intel Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this
+ * software and its documentation for any purpose is hereby granted
+ * without fee, provided that\n the above copyright notice appear in
+ * all copies and that both that copyright notice and this permission
+ * notice appear in supporting documentation, and that the name of
+ * the copyright holders not be used in advertising or publicity
+ * pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no
+ * representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied
+ * warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ * THIS SOFTWARE.
+ */
+
+#ifndef DRM_CLIENT_PROTOCOL_H
+#define DRM_CLIENT_PROTOCOL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <stddef.h>
+#include "wayland-client.h"
+
+struct wl_client;
+struct wl_resource;
+
+struct wl_drm;
+
+extern const struct wl_interface wl_drm_interface;
+
+#ifndef WL_DRM_ERROR_ENUM
+#define WL_DRM_ERROR_ENUM
+enum wl_drm_error {
+ WL_DRM_ERROR_AUTHENTICATE_FAIL = 0,
+ WL_DRM_ERROR_INVALID_FORMAT = 1,
+ WL_DRM_ERROR_INVALID_NAME = 2,
+};
+#endif /* WL_DRM_ERROR_ENUM */
+
+#ifndef WL_DRM_FORMAT_ENUM
+#define WL_DRM_FORMAT_ENUM
+enum wl_drm_format {
+ WL_DRM_FORMAT_C8 = 0x20203843,
+ WL_DRM_FORMAT_RGB332 = 0x38424752,
+ WL_DRM_FORMAT_BGR233 = 0x38524742,
+ WL_DRM_FORMAT_XRGB4444 = 0x32315258,
+ WL_DRM_FORMAT_XBGR4444 = 0x32314258,
+ WL_DRM_FORMAT_RGBX4444 = 0x32315852,
+ WL_DRM_FORMAT_BGRX4444 = 0x32315842,
+ WL_DRM_FORMAT_ARGB4444 = 0x32315241,
+ WL_DRM_FORMAT_ABGR4444 = 0x32314241,
+ WL_DRM_FORMAT_RGBA4444 = 0x32314152,
+ WL_DRM_FORMAT_BGRA4444 = 0x32314142,
+ WL_DRM_FORMAT_XRGB1555 = 0x35315258,
+ WL_DRM_FORMAT_XBGR1555 = 0x35314258,
+ WL_DRM_FORMAT_RGBX5551 = 0x35315852,
+ WL_DRM_FORMAT_BGRX5551 = 0x35315842,
+ WL_DRM_FORMAT_ARGB1555 = 0x35315241,
+ WL_DRM_FORMAT_ABGR1555 = 0x35314241,
+ WL_DRM_FORMAT_RGBA5551 = 0x35314152,
+ WL_DRM_FORMAT_BGRA5551 = 0x35314142,
+ WL_DRM_FORMAT_RGB565 = 0x36314752,
+ WL_DRM_FORMAT_BGR565 = 0x36314742,
+ WL_DRM_FORMAT_RGB888 = 0x34324752,
+ WL_DRM_FORMAT_BGR888 = 0x34324742,
+ WL_DRM_FORMAT_XRGB8888 = 0x34325258,
+ WL_DRM_FORMAT_XBGR8888 = 0x34324258,
+ WL_DRM_FORMAT_RGBX8888 = 0x34325852,
+ WL_DRM_FORMAT_BGRX8888 = 0x34325842,
+ WL_DRM_FORMAT_ARGB8888 = 0x34325241,
+ WL_DRM_FORMAT_ABGR8888 = 0x34324241,
+ WL_DRM_FORMAT_RGBA8888 = 0x34324152,
+ WL_DRM_FORMAT_BGRA8888 = 0x34324142,
+ WL_DRM_FORMAT_XRGB2101010 = 0x30335258,
+ WL_DRM_FORMAT_XBGR2101010 = 0x30334258,
+ WL_DRM_FORMAT_RGBX1010102 = 0x30335852,
+ WL_DRM_FORMAT_BGRX1010102 = 0x30335842,
+ WL_DRM_FORMAT_ARGB2101010 = 0x30335241,
+ WL_DRM_FORMAT_ABGR2101010 = 0x30334241,
+ WL_DRM_FORMAT_RGBA1010102 = 0x30334152,
+ WL_DRM_FORMAT_BGRA1010102 = 0x30334142,
+ WL_DRM_FORMAT_YUYV = 0x56595559,
+ WL_DRM_FORMAT_YVYU = 0x55595659,
+ WL_DRM_FORMAT_UYVY = 0x59565955,
+ WL_DRM_FORMAT_VYUY = 0x59555956,
+ WL_DRM_FORMAT_AYUV = 0x56555941,
+ WL_DRM_FORMAT_NV12 = 0x3231564e,
+ WL_DRM_FORMAT_NV21 = 0x3132564e,
+ WL_DRM_FORMAT_NV16 = 0x3631564e,
+ WL_DRM_FORMAT_NV61 = 0x3136564e,
+ WL_DRM_FORMAT_YUV410 = 0x39565559,
+ WL_DRM_FORMAT_YVU410 = 0x39555659,
+ WL_DRM_FORMAT_YUV411 = 0x31315559,
+ WL_DRM_FORMAT_YVU411 = 0x31315659,
+ WL_DRM_FORMAT_YUV420 = 0x32315559,
+ WL_DRM_FORMAT_YVU420 = 0x32315659,
+ WL_DRM_FORMAT_YUV422 = 0x36315559,
+ WL_DRM_FORMAT_YVU422 = 0x36315659,
+ WL_DRM_FORMAT_YUV444 = 0x34325559,
+ WL_DRM_FORMAT_YVU444 = 0x34325659,
+};
+#endif /* WL_DRM_FORMAT_ENUM */
+
+struct wl_drm_listener {
+ /**
+ * device - device
+ * @name: name
+ */
+ void (*device)(void *data,
+ struct wl_drm *wl_drm,
+ const char *name);
+ /**
+ * format - format
+ * @format: format
+ */
+ void (*format)(void *data,
+ struct wl_drm *wl_drm,
+ uint32_t format);
+ /**
+ * authenticated - authenticated
+ */
+ void (*authenticated)(void *data,
+ struct wl_drm *wl_drm);
+};
+
+static inline int
+wl_drm_add_listener(struct wl_drm *wl_drm,
+ const struct wl_drm_listener *listener, void *data)
+{
+ return wl_proxy_add_listener((struct wl_proxy *) wl_drm,
+ (void (**)(void)) listener, data);
+}
+
+#define WL_DRM_AUTHENTICATE 0
+#define WL_DRM_CREATE_BUFFER 1
+#define WL_DRM_CREATE_PLANAR_BUFFER 2
+
+static inline void
+wl_drm_set_user_data(struct wl_drm *wl_drm, void *user_data)
+{
+ wl_proxy_set_user_data((struct wl_proxy *) wl_drm, user_data);
+}
+
+static inline void *
+wl_drm_get_user_data(struct wl_drm *wl_drm)
+{
+ return wl_proxy_get_user_data((struct wl_proxy *) wl_drm);
+}
+
+static inline void
+wl_drm_destroy(struct wl_drm *wl_drm)
+{
+ wl_proxy_destroy((struct wl_proxy *) wl_drm);
+}
+
+static inline void
+wl_drm_authenticate(struct wl_drm *wl_drm, uint32_t id)
+{
+ wl_proxy_marshal((struct wl_proxy *) wl_drm,
+ WL_DRM_AUTHENTICATE, id);
+}
+
+static inline struct wl_buffer *
+wl_drm_create_buffer(struct wl_drm *wl_drm, uint32_t name, int32_t width, int32_t height, uint32_t stride, uint32_t format)
+{
+ struct wl_proxy *id;
+
+ id = wl_proxy_create((struct wl_proxy *) wl_drm,
+ &wl_buffer_interface);
+ if (!id)
+ return NULL;
+
+ wl_proxy_marshal((struct wl_proxy *) wl_drm,
+ WL_DRM_CREATE_BUFFER, id, name, width, height, stride, format);
+
+ return (struct wl_buffer *) id;
+}
+
+static inline struct wl_buffer *
+wl_drm_create_planar_buffer(struct wl_drm *wl_drm, uint32_t name, int32_t width, int32_t height, uint32_t format, int32_t offset0, int32_t stride0, int32_t offset1, int32_t stride1, int32_t offset2, int32_t stride2)
+{
+ struct wl_proxy *id;
+
+ id = wl_proxy_create((struct wl_proxy *) wl_drm,
+ &wl_buffer_interface);
+ if (!id)
+ return NULL;
+
+ wl_proxy_marshal((struct wl_proxy *) wl_drm,
+ WL_DRM_CREATE_PLANAR_BUFFER, id, name, width, height, format, offset0, stride0, offset1, stride1, offset2, stride2);
+
+ return (struct wl_buffer *) id;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/wayland/Makefile.am b/src/wayland/Makefile.am
new file mode 100644
index 0000000..614d8a4
--- /dev/null
+++ b/src/wayland/Makefile.am
@@ -0,0 +1,28 @@
+# Copyright (C) 2012 Intel Corporation. All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sub license, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+# IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+EXTRA_DIST = \
+ wayland-drm.xml \
+ $(NULL)
+
+# Extra clean files so that maintainer-clean removes *everything*
+MAINTAINERCLEANFILES = Makefile.in
diff --git a/src/wayland/wayland-drm.xml b/src/wayland/wayland-drm.xml
new file mode 100644
index 0000000..265d4f8
--- /dev/null
+++ b/src/wayland/wayland-drm.xml
@@ -0,0 +1,155 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<protocol name="drm">
+
+ <copyright>
+ Copyright © 2008-2011 Kristian Høgsberg
+ Copyright © 2010-2011 Intel Corporation
+
+ Permission to use, copy, modify, distribute, and sell this
+ software and its documentation for any purpose is hereby granted
+ without fee, provided that\n the above copyright notice appear in
+ all copies and that both that copyright notice and this permission
+ notice appear in supporting documentation, and that the name of
+ the copyright holders not be used in advertising or publicity
+ pertaining to distribution of the software without specific,
+ written prior permission. The copyright holders make no
+ representations about the suitability of this software for any
+ purpose. It is provided "as is" without express or implied
+ warranty.
+
+ THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+ THIS SOFTWARE.
+ </copyright>
+
+ <!-- drm support. This object is created by the server and published
+ using the display's global event. -->
+ <interface name="wl_drm" version="1">
+ <enum name="error">
+ <entry name="authenticate_fail" value="0"/>
+ <entry name="invalid_format" value="1"/>
+ <entry name="invalid_name" value="2"/>
+ </enum>
+
+ <enum name="format">
+ <!-- The drm format codes match the #defines in drm_fourcc.h.
+ The formats actually supported by the compositor will be
+ reported by the format event. -->
+ <entry name="c8" value="0x20203843"/>
+ <entry name="rgb332" value="0x38424752"/>
+ <entry name="bgr233" value="0x38524742"/>
+ <entry name="xrgb4444" value="0x32315258"/>
+ <entry name="xbgr4444" value="0x32314258"/>
+ <entry name="rgbx4444" value="0x32315852"/>
+ <entry name="bgrx4444" value="0x32315842"/>
+ <entry name="argb4444" value="0x32315241"/>
+ <entry name="abgr4444" value="0x32314241"/>
+ <entry name="rgba4444" value="0x32314152"/>
+ <entry name="bgra4444" value="0x32314142"/>
+ <entry name="xrgb1555" value="0x35315258"/>
+ <entry name="xbgr1555" value="0x35314258"/>
+ <entry name="rgbx5551" value="0x35315852"/>
+ <entry name="bgrx5551" value="0x35315842"/>
+ <entry name="argb1555" value="0x35315241"/>
+ <entry name="abgr1555" value="0x35314241"/>
+ <entry name="rgba5551" value="0x35314152"/>
+ <entry name="bgra5551" value="0x35314142"/>
+ <entry name="rgb565" value="0x36314752"/>
+ <entry name="bgr565" value="0x36314742"/>
+ <entry name="rgb888" value="0x34324752"/>
+ <entry name="bgr888" value="0x34324742"/>
+ <entry name="xrgb8888" value="0x34325258"/>
+ <entry name="xbgr8888" value="0x34324258"/>
+ <entry name="rgbx8888" value="0x34325852"/>
+ <entry name="bgrx8888" value="0x34325842"/>
+ <entry name="argb8888" value="0x34325241"/>
+ <entry name="abgr8888" value="0x34324241"/>
+ <entry name="rgba8888" value="0x34324152"/>
+ <entry name="bgra8888" value="0x34324142"/>
+ <entry name="xrgb2101010" value="0x30335258"/>
+ <entry name="xbgr2101010" value="0x30334258"/>
+ <entry name="rgbx1010102" value="0x30335852"/>
+ <entry name="bgrx1010102" value="0x30335842"/>
+ <entry name="argb2101010" value="0x30335241"/>
+ <entry name="abgr2101010" value="0x30334241"/>
+ <entry name="rgba1010102" value="0x30334152"/>
+ <entry name="bgra1010102" value="0x30334142"/>
+ <entry name="yuyv" value="0x56595559"/>
+ <entry name="yvyu" value="0x55595659"/>
+ <entry name="uyvy" value="0x59565955"/>
+ <entry name="vyuy" value="0x59555956"/>
+ <entry name="ayuv" value="0x56555941"/>
+ <entry name="nv12" value="0x3231564e"/>
+ <entry name="nv21" value="0x3132564e"/>
+ <entry name="nv16" value="0x3631564e"/>
+ <entry name="nv61" value="0x3136564e"/>
+ <entry name="yuv410" value="0x39565559"/>
+ <entry name="yvu410" value="0x39555659"/>
+ <entry name="yuv411" value="0x31315559"/>
+ <entry name="yvu411" value="0x31315659"/>
+ <entry name="yuv420" value="0x32315559"/>
+ <entry name="yvu420" value="0x32315659"/>
+ <entry name="yuv422" value="0x36315559"/>
+ <entry name="yvu422" value="0x36315659"/>
+ <entry name="yuv444" value="0x34325559"/>
+ <entry name="yvu444" value="0x34325659"/>
+ </enum>
+
+ <!-- Call this request with the magic received from drmGetMagic().
+ It will be passed on to the drmAuthMagic() or
+ DRIAuthConnection() call. This authentication must be
+ completed before create_buffer could be used. -->
+ <request name="authenticate">
+ <arg name="id" type="uint"/>
+ </request>
+
+ <!-- Create a wayland buffer for the named DRM buffer. The DRM
+ surface must have a name using the flink ioctl -->
+ <request name="create_buffer">
+ <arg name="id" type="new_id" interface="wl_buffer"/>
+ <arg name="name" type="uint"/>
+ <arg name="width" type="int"/>
+ <arg name="height" type="int"/>
+ <arg name="stride" type="uint"/>
+ <arg name="format" type="uint"/>
+ </request>
+
+ <!-- Create a wayland buffer for the named DRM buffer. The DRM
+ surface must have a name using the flink ioctl -->
+ <request name="create_planar_buffer">
+ <arg name="id" type="new_id" interface="wl_buffer"/>
+ <arg name="name" type="uint"/>
+ <arg name="width" type="int"/>
+ <arg name="height" type="int"/>
+ <arg name="format" type="uint"/>
+ <arg name="offset0" type="int"/>
+ <arg name="stride0" type="int"/>
+ <arg name="offset1" type="int"/>
+ <arg name="stride1" type="int"/>
+ <arg name="offset2" type="int"/>
+ <arg name="stride2" type="int"/>
+ </request>
+
+ <!-- Notification of the path of the drm device which is used by
+ the server. The client should use this device for creating
+ local buffers. Only buffers created from this device should
+ be be passed to the server using this drm object's
+ create_buffer request. -->
+ <event name="device">
+ <arg name="name" type="string"/>
+ </event>
+
+ <event name="format">
+ <arg name="format" type="uint"/>
+ </event>
+
+ <!-- Raised if the authenticate request succeeded -->
+ <event name="authenticated"/>
+ </interface>
+
+</protocol>
--
intel-vaapi-driver packaging
More information about the pkg-multimedia-commits
mailing list