[SCM] intel-vaapi-driver/master: Add patches to handle split kernels

sramacher at users.alioth.debian.org sramacher at users.alioth.debian.org
Tue Oct 17 21:01:27 UTC 2017


The following commit has been merged in the master branch:
commit 325e7f66321d2f1d4eb170e4b84fb3ea09bcfc7e
Author: Sebastian Ramacher <sebastian at ramacher.at>
Date:   Tue Oct 17 23:00:29 2017 +0200

    Add patches to handle split kernels

diff --git a/debian/patches/0001-Move-shaders-without-source-to-extra-library.patch b/debian/patches/0001-Move-shaders-without-source-to-extra-library.patch
new file mode 100644
index 0000000..fda25d1
--- /dev/null
+++ b/debian/patches/0001-Move-shaders-without-source-to-extra-library.patch
@@ -0,0 +1,1003 @@
+From: Sebastian Ramacher <sebastian at ramacher.at>
+Date: Mon, 16 Oct 2017 19:08:40 +0200
+Subject: Move shaders without source to extra library
+
+---
+ src/Makefile.am                    |  14 +++
+ src/Makefile.sources               |   4 +-
+ src/gen75_vpp_gpe.c                |  65 ++++++------
+ src/gen8_encoder_vp8.c             |  71 +++----------
+ src/gen8_post_processing.c         |   7 +-
+ src/gen9_encoder_vp8.c             |  71 +++----------
+ src/gen9_hevc_enc_kernels_binary.h |   5 +-
+ src/gen9_hevc_encoder.c            |   2 +-
+ src/gen9_post_processing.c         |  34 +++----
+ src/gen9_vp9_encoder.c             |  12 +--
+ src/gen9_vp9_encoder_kernels.h     |   4 +-
+ src/i965_avc_encoder.c             |   8 +-
+ src/i965_avc_encoder_kernels.h     |  17 ++--
+ src/i965_drv_video.c               |   6 ++
+ src/kernels.c                      | 202 +++++++++++++++++++++++++++++++++++++
+ src/kernels.h                      |  54 ++++++++++
+ 16 files changed, 378 insertions(+), 198 deletions(-)
+ create mode 100644 src/kernels.c
+ create mode 100644 src/kernels.h
+
+diff --git a/src/Makefile.am b/src/Makefile.am
+index 06977c6..7cfb46c 100644
+--- a/src/Makefile.am
++++ b/src/Makefile.am
+@@ -65,6 +65,20 @@ i965_drv_video_la_LDFLAGS	= -module $(driver_ldflags)
+ i965_drv_video_la_LIBADD	= libi965_drv_video.la $(driver_libs)
+ i965_drv_video_la_SOURCES	=
+ 
++# shaders module
++i965_drv_video_shaders_la_LTLIBRARIES	= i965_drv_video_shaders.la
++i965_drv_video_shaders_ladir		= $(LIBVA_DRIVERS_PATH)
++i965_drv_video_shaders_la_LDFLAGS	= -module $(driver_ldflags)
++i965_drv_video_shaders_la_SOURCES	= \
++	gen75_vpp_gpe_kernels.c \
++	gen8_encoder_vp8_kernels.c \
++	gen8_post_processing_kernels.c \
++	gen9_hevc_enc_kernels_binary.c \
++	gen9_post_processing_kernels.c \
++	gen9_vp9_encoder_kernels.c \
++	i965_avc_encoder_kernels.c \
++	$(NULL)
++
+ noinst_HEADERS			= $(source_h)
+ 
+ if USE_X11
+diff --git a/src/Makefile.sources b/src/Makefile.sources
+index 00e799c..0ba05f8 100644
+--- a/src/Makefile.sources
++++ b/src/Makefile.sources
+@@ -53,18 +53,16 @@ source_c = \
+ 	vp8_probs.c \
+ 	vp9_probs.c \
+ 	vpx_quant.c \
+-	gen9_vp9_encoder_kernels.c \
+ 	gen9_vp9_const_def.c \
+ 	gen9_vp9_encoder.c \
+ 	intel_common_vpp_internal.c \
+ 	i965_encoder_const_def.c \
+ 	i965_avc_const_def.c \
+-	i965_avc_encoder_kernels.c \
+ 	i965_avc_encoder_common.c \
+ 	i965_avc_encoder.c \
+-	gen9_hevc_enc_kernels_binary.c \
+ 	gen9_hevc_encoder.c \
+ 	gen9_hevc_enc_utils.c \
++	kernels.c \
+ 	$(NULL)
+ 
+ source_h = \
+diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c
+index ad893e8..3e700c7 100644
+--- a/src/gen75_vpp_gpe.c
++++ b/src/gen75_vpp_gpe.c
+@@ -33,6 +33,7 @@
+ #include "intel_batchbuffer.h"
+ #include "intel_driver.h"
+ 
++#include "kernels.h"
+ #include "i965_structs.h"
+ #include "i965_defines.h"
+ #include "i965_drv_video.h"
+@@ -52,70 +53,51 @@
+ #define CURBE_URB_ENTRY_LENGTH  4
+ 
+ /* Shaders information for sharpening */
+-static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
+-#include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
+-};
+-static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
+-#include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
+-};
+-static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
+-#include "shaders/post_processing/gen75/sharpening_unmask.g75b"
+-};
+ static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
+     {
+         "vpp: sharpening(horizontal blur)",
+         VPP_GPE_SHARPENING,
+-        gen75_gpe_sharpening_h_blur,
+-        sizeof(gen75_gpe_sharpening_h_blur),
++        NULL,
++        0,
+         NULL
+     },
+     {
+         "vpp: sharpening(vertical blur)",
+         VPP_GPE_SHARPENING,
+-        gen75_gpe_sharpening_v_blur,
+-        sizeof(gen75_gpe_sharpening_v_blur),
++        NULL,
++        0,
+         NULL
+     },
+     {
+         "vpp: sharpening(unmask)",
+         VPP_GPE_SHARPENING,
+-        gen75_gpe_sharpening_unmask,
+-        sizeof(gen75_gpe_sharpening_unmask),
++        NULL,
++        0,
+         NULL
+     },
+ };
+ 
+ /* sharpening kernels for Broadwell */
+-static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
+-#include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
+-};
+-static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
+-#include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
+-};
+-static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
+-#include "shaders/post_processing/gen8/sharpening_unmask.g8b"
+-};
+-
+ static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
+     {
+         "vpp: sharpening(horizontal blur)",
+         VPP_GPE_SHARPENING,
+-        gen8_gpe_sharpening_h_blur,
+-        sizeof(gen8_gpe_sharpening_h_blur),
++        NULL,
++        0,
+         NULL
+     },
+     {
+         "vpp: sharpening(vertical blur)",
+         VPP_GPE_SHARPENING,
+-        gen8_gpe_sharpening_v_blur,
+-        sizeof(gen8_gpe_sharpening_v_blur),
++        NULL,
++        0,
+         NULL
+     },
+     {
+         "vpp: sharpening(unmask)",
+         VPP_GPE_SHARPENING,
+-        gen8_gpe_sharpening_unmask,
+-        sizeof(gen8_gpe_sharpening_unmask),
++        NULL,
++        0,
+         NULL
+     },
+ };
+@@ -624,9 +606,10 @@ vpp_gpe_process_sharpening(VADriverContextP ctx,
+     if (vpp_gpe_ctx->is_first_frame) {
+         vpp_gpe_ctx->sub_shader_sum = 3;
+         struct i965_kernel * vpp_kernels;
+-        if (IS_HASWELL(i965->intel.device_info))
++        // TODO: error out if no shaders available?
++        if (IS_HASWELL(i965->intel.device_info)) {
+             vpp_kernels = gen75_vpp_sharpening_kernels;
+-        else if (IS_GEN8(i965->intel.device_info) ||
++        } else if (IS_GEN8(i965->intel.device_info) ||
+                  IS_GEN9(i965->intel.device_info)) // TODO: build the sharpening kernel for GEN9
+             vpp_kernels = gen8_vpp_sharpening_kernels;
+         else
+@@ -868,6 +851,14 @@ vpp_gpe_context_init(VADriverContextP ctx)
+     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
+ 
+     if (IS_HASWELL(i965->intel.device_info)) {
++        /* load kernels */
++        gen75_vpp_sharpening_kernels[0].bin  = gen75_gpe_sharpening_h_blur;
++        gen75_vpp_sharpening_kernels[0].size = gen75_gpe_sharpening_h_blur_size;
++        gen75_vpp_sharpening_kernels[1].bin  = gen75_gpe_sharpening_v_blur;
++        gen75_vpp_sharpening_kernels[1].size = gen75_gpe_sharpening_v_blur_size;
++        gen75_vpp_sharpening_kernels[2].bin  = gen75_gpe_sharpening_unmask;
++        gen75_vpp_sharpening_kernels[2].size = gen75_gpe_sharpening_unmask_size;
++
+         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
+         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
+         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
+@@ -880,6 +871,14 @@ vpp_gpe_context_init(VADriverContextP ctx)
+ 
+     } else if (IS_GEN8(i965->intel.device_info) ||
+                IS_GEN9(i965->intel.device_info)) {
++        /* load kernels */
++        gen8_vpp_sharpening_kernels[0].bin  = gen8_gpe_sharpening_h_blur;
++        gen8_vpp_sharpening_kernels[0].size = gen8_gpe_sharpening_h_blur_size;
++        gen8_vpp_sharpening_kernels[1].bin  = gen8_gpe_sharpening_v_blur;
++        gen8_vpp_sharpening_kernels[1].size = gen8_gpe_sharpening_v_blur_size;
++        gen8_vpp_sharpening_kernels[2].bin  = gen8_gpe_sharpening_unmask;
++        gen8_vpp_sharpening_kernels[2].size = gen8_gpe_sharpening_unmask_size;
++
+         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
+         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
+         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
+diff --git a/src/gen8_encoder_vp8.c b/src/gen8_encoder_vp8.c
+index 8ac3932..f1334f7 100644
+--- a/src/gen8_encoder_vp8.c
++++ b/src/gen8_encoder_vp8.c
+@@ -36,6 +36,7 @@
+ #include "intel_batchbuffer.h"
+ #include "intel_driver.h"
+ 
++#include "kernels.h"
+ #include "i965_defines.h"
+ #include "i965_drv_video.h"
+ #include "i965_encoder.h"
+@@ -49,89 +50,45 @@ extern struct i965_kernel vp8_kernels_mpu[NUM_VP8_MPU];
+ extern struct i965_kernel vp8_kernels_tpu[NUM_VP8_TPU];
+ extern struct i965_kernel vp8_kernels_brc_update[NUM_VP8_BRC_UPDATE];
+ 
+-static const uint32_t gen8_brc_init_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_brc_init_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_brc_reset_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_brc_reset_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_scaling_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/hme_downscale_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_me_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/hme_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_mbenc_i_frame_dist_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_intra_distortion_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_mbenc_i_frame_luma_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_enc_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_mbenc_i_frame_chroma_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_enc_genx_1.g8b"
+-};
+-
+-static const uint32_t gen8_mbenc_p_frame_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_enc_genx_2.g8b"
+-};
+-
+-static const uint32_t gen8_mpu_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_mpu_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_tpu_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_tpu_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_brc_update_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_brc_update_genx_0.g8b"
+-};
+-
+ Bool
+ gen8_encoder_vp8_context_init(VADriverContextP ctx,
+                               struct intel_encoder_context *encoder_context,
+                               struct i965_encoder_vp8_context *vp8_context)
+ {
+     vp8_kernels_brc_init_reset[VP8_BRC_INIT].bin = gen8_brc_init_bin_vp8;
+-    vp8_kernels_brc_init_reset[VP8_BRC_INIT].size = sizeof(gen8_brc_init_bin_vp8);
++    vp8_kernels_brc_init_reset[VP8_BRC_INIT].size = gen8_brc_init_bin_vp8_size;
+     vp8_kernels_brc_init_reset[VP8_BRC_RESET].bin = gen8_brc_reset_bin_vp8;
+-    vp8_kernels_brc_init_reset[VP8_BRC_RESET].size = sizeof(gen8_brc_reset_bin_vp8);
++    vp8_kernels_brc_init_reset[VP8_BRC_RESET].size = gen8_brc_reset_bin_vp8_size;
+ 
+     /* scaling 4x and 16x use the same kernel */
+     vp8_kernels_scaling[VP8_SCALING_4X].bin = gen8_scaling_bin_vp8;
+-    vp8_kernels_scaling[VP8_SCALING_4X].size = sizeof(gen8_scaling_bin_vp8);
++    vp8_kernels_scaling[VP8_SCALING_4X].size = gen8_scaling_bin_vp8_size;
+     vp8_kernels_scaling[VP8_SCALING_16X].bin = gen8_scaling_bin_vp8;
+-    vp8_kernels_scaling[VP8_SCALING_16X].size = sizeof(gen8_scaling_bin_vp8);
++    vp8_kernels_scaling[VP8_SCALING_16X].size = gen8_scaling_bin_vp8_size;
+ 
+     /* me 4x and 16x use the same kernel */
+     vp8_kernels_me[VP8_ME_4X].bin = gen8_me_bin_vp8;
+-    vp8_kernels_me[VP8_ME_4X].size = sizeof(gen8_me_bin_vp8);
++    vp8_kernels_me[VP8_ME_4X].size = gen8_me_bin_vp8_size;
+     vp8_kernels_me[VP8_ME_16X].bin = gen8_me_bin_vp8;
+-    vp8_kernels_me[VP8_ME_16X].size = sizeof(gen8_me_bin_vp8);
++    vp8_kernels_me[VP8_ME_16X].size = gen8_me_bin_vp8_size;
+ 
+     vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].bin = gen8_mbenc_i_frame_dist_bin_vp8;
+-    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].size = sizeof(gen8_mbenc_i_frame_dist_bin_vp8);
++    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].size = gen8_mbenc_i_frame_dist_bin_vp8_size;
+     vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].bin = gen8_mbenc_i_frame_luma_bin_vp8;
+-    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].size = sizeof(gen8_mbenc_i_frame_luma_bin_vp8);
++    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].size = gen8_mbenc_i_frame_luma_bin_vp8_size;
+     vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].bin = gen8_mbenc_i_frame_chroma_bin_vp8;
+-    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].size = sizeof(gen8_mbenc_i_frame_chroma_bin_vp8);
++    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].size = gen8_mbenc_i_frame_chroma_bin_vp8_size;
+     vp8_kernels_mbenc[VP8_MBENC_P_FRAME].bin = gen8_mbenc_p_frame_bin_vp8;
+-    vp8_kernels_mbenc[VP8_MBENC_P_FRAME].size = sizeof(gen8_mbenc_p_frame_bin_vp8);
++    vp8_kernels_mbenc[VP8_MBENC_P_FRAME].size = gen8_mbenc_p_frame_bin_vp8_size;
+ 
+     vp8_kernels_mpu[VP8_MPU].bin = gen8_mpu_bin_vp8;
+-    vp8_kernels_mpu[VP8_MPU].size = sizeof(gen8_mpu_bin_vp8);
++    vp8_kernels_mpu[VP8_MPU].size = gen8_mpu_bin_vp8_size;
+ 
+     vp8_kernels_brc_update[VP8_BRC_UPDATE].bin = gen8_brc_update_bin_vp8;
+-    vp8_kernels_brc_update[VP8_BRC_UPDATE].size = sizeof(gen8_brc_update_bin_vp8);
++    vp8_kernels_brc_update[VP8_BRC_UPDATE].size = gen8_brc_update_bin_vp8_size;
+ 
+     vp8_kernels_tpu[VP8_TPU].bin = gen8_tpu_bin_vp8;
+-    vp8_kernels_tpu[VP8_TPU].size = sizeof(gen8_tpu_bin_vp8);
++    vp8_kernels_tpu[VP8_TPU].size = gen8_tpu_bin_vp8_size;
+ 
+     vp8_context->idrt_entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+     vp8_context->mocs = 0;
+diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
+index 033b50e..3317452 100644
+--- a/src/gen8_post_processing.c
++++ b/src/gen8_post_processing.c
+@@ -42,6 +42,7 @@
+ #include "i965_yuv_coefs.h"
+ #include "intel_media.h"
+ 
++#include "kernels.h"
+ #include "gen75_picture_process.h"
+ #include "intel_common_vpp_internal.h"
+ 
+@@ -324,10 +325,6 @@ static struct pp_module pp_modules_gen8[] = {
+ 
+ #define DEFAULT_MOCS    0
+ 
+-static const uint32_t pp_yuv420p8_scaling_gen8[][4] = {
+-#include "shaders/post_processing/gen8/conv_nv12.g8b"
+-};
+-
+ static void
+ gen8_pp_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
+ {
+@@ -1675,7 +1672,7 @@ gen8_post_processing_context_init(VADriverContextP ctx,
+     gpe_context = &pp_context->scaling_gpe_context;
+     memset(&scaling_kernel, 0, sizeof(scaling_kernel));
+     scaling_kernel.bin = pp_yuv420p8_scaling_gen8;
+-    scaling_kernel.size = sizeof(pp_yuv420p8_scaling_gen8);
++    scaling_kernel.size = pp_yuv420p8_scaling_gen8_size;
+     gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
+     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+     gpe_context->idrt.max_entries = 1;
+diff --git a/src/gen9_encoder_vp8.c b/src/gen9_encoder_vp8.c
+index 2ac5efa..e2ea959 100644
+--- a/src/gen9_encoder_vp8.c
++++ b/src/gen9_encoder_vp8.c
+@@ -36,6 +36,7 @@
+ #include "intel_batchbuffer.h"
+ #include "intel_driver.h"
+ 
++#include "kernels.h"
+ #include "i965_defines.h"
+ #include "i965_drv_video.h"
+ #include "i965_encoder.h"
+@@ -49,89 +50,45 @@ extern struct i965_kernel vp8_kernels_mpu[NUM_VP8_MPU];
+ extern struct i965_kernel vp8_kernels_tpu[NUM_VP8_TPU];
+ extern struct i965_kernel vp8_kernels_brc_update[NUM_VP8_BRC_UPDATE];
+ 
+-static const uint32_t gen9_brc_init_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_brc_init_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_brc_reset_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_brc_reset_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_scaling_bin_vp8[][4] = {
+-#include "shaders/brc/skl/hme_downscale_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_me_bin_vp8[][4] = {
+-#include "shaders/brc/skl/hme_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_mbenc_i_frame_dist_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_intra_distortion_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_mbenc_i_frame_luma_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_enc_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_mbenc_i_frame_chroma_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_enc_genx_1.g9b"
+-};
+-
+-static const uint32_t gen9_mbenc_p_frame_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_enc_genx_2.g9b"
+-};
+-
+-static const uint32_t gen9_mpu_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_mpu_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_tpu_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_tpu_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_brc_update_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_brc_update_genx_0.g9b"
+-};
+-
+ Bool
+ gen9_encoder_vp8_context_init(VADriverContextP ctx,
+                               struct intel_encoder_context *encoder_context,
+                               struct i965_encoder_vp8_context *vp8_context)
+ {
+     vp8_kernels_brc_init_reset[VP8_BRC_INIT].bin = gen9_brc_init_bin_vp8;
+-    vp8_kernels_brc_init_reset[VP8_BRC_INIT].size = sizeof(gen9_brc_init_bin_vp8);
++    vp8_kernels_brc_init_reset[VP8_BRC_INIT].size = gen9_brc_init_bin_vp8_size;
+     vp8_kernels_brc_init_reset[VP8_BRC_RESET].bin = gen9_brc_reset_bin_vp8;
+-    vp8_kernels_brc_init_reset[VP8_BRC_RESET].size = sizeof(gen9_brc_reset_bin_vp8);
++    vp8_kernels_brc_init_reset[VP8_BRC_RESET].size = gen9_brc_reset_bin_vp8_size;
+ 
+     /* scaling 4x and 16x use the same kernel */
+     vp8_kernels_scaling[VP8_SCALING_4X].bin = gen9_scaling_bin_vp8;
+-    vp8_kernels_scaling[VP8_SCALING_4X].size = sizeof(gen9_scaling_bin_vp8);
++    vp8_kernels_scaling[VP8_SCALING_4X].size = gen9_scaling_bin_vp8_size;
+     vp8_kernels_scaling[VP8_SCALING_16X].bin = gen9_scaling_bin_vp8;
+-    vp8_kernels_scaling[VP8_SCALING_16X].size = sizeof(gen9_scaling_bin_vp8);
++    vp8_kernels_scaling[VP8_SCALING_16X].size = gen9_scaling_bin_vp8_size;
+ 
+     /* me 4x and 16x use the same kernel */
+     vp8_kernels_me[VP8_ME_4X].bin = gen9_me_bin_vp8;
+-    vp8_kernels_me[VP8_ME_4X].size = sizeof(gen9_me_bin_vp8);
++    vp8_kernels_me[VP8_ME_4X].size = gen9_me_bin_vp8_size;
+     vp8_kernels_me[VP8_ME_16X].bin = gen9_me_bin_vp8;
+-    vp8_kernels_me[VP8_ME_16X].size = sizeof(gen9_me_bin_vp8);
++    vp8_kernels_me[VP8_ME_16X].size = gen9_me_bin_vp8_size;
+ 
+     vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].bin = gen9_mbenc_i_frame_dist_bin_vp8;
+-    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].size = sizeof(gen9_mbenc_i_frame_dist_bin_vp8);
++    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].size = gen9_mbenc_i_frame_dist_bin_vp8_size;
+     vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].bin = gen9_mbenc_i_frame_luma_bin_vp8;
+-    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].size = sizeof(gen9_mbenc_i_frame_luma_bin_vp8);
++    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].size = gen9_mbenc_i_frame_luma_bin_vp8_size;
+     vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].bin = gen9_mbenc_i_frame_chroma_bin_vp8;
+-    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].size = sizeof(gen9_mbenc_i_frame_chroma_bin_vp8);
++    vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].size = gen9_mbenc_i_frame_chroma_bin_vp8_size;
+     vp8_kernels_mbenc[VP8_MBENC_P_FRAME].bin = gen9_mbenc_p_frame_bin_vp8;
+-    vp8_kernels_mbenc[VP8_MBENC_P_FRAME].size = sizeof(gen9_mbenc_p_frame_bin_vp8);
++    vp8_kernels_mbenc[VP8_MBENC_P_FRAME].size = gen9_mbenc_p_frame_bin_vp8_size;
+ 
+     vp8_kernels_mpu[VP8_MPU].bin = gen9_mpu_bin_vp8;
+-    vp8_kernels_mpu[VP8_MPU].size = sizeof(gen9_mpu_bin_vp8);
++    vp8_kernels_mpu[VP8_MPU].size = gen9_mpu_bin_vp8_size;
+ 
+     vp8_kernels_brc_update[VP8_BRC_UPDATE].bin = gen9_brc_update_bin_vp8;
+-    vp8_kernels_brc_update[VP8_BRC_UPDATE].size = sizeof(gen9_brc_update_bin_vp8);
++    vp8_kernels_brc_update[VP8_BRC_UPDATE].size = gen9_brc_update_bin_vp8_size;
+ 
+     vp8_kernels_tpu[VP8_TPU].bin = gen9_tpu_bin_vp8;
+-    vp8_kernels_tpu[VP8_TPU].size = sizeof(gen9_tpu_bin_vp8);
++    vp8_kernels_tpu[VP8_TPU].size = gen9_tpu_bin_vp8_size;
+ 
+     vp8_context->idrt_entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ 
+diff --git a/src/gen9_hevc_enc_kernels_binary.h b/src/gen9_hevc_enc_kernels_binary.h
+index 29659fa..484421e 100644
+--- a/src/gen9_hevc_enc_kernels_binary.h
++++ b/src/gen9_hevc_enc_kernels_binary.h
+@@ -29,8 +29,7 @@
+ #ifndef GEN9_HEVC_ENCODER_KERNELS_BINARY_H
+ #define GEN9_HEVC_ENCODER_KERNELS_BINARY_H
+ 
+-#define GEN9_HEVC_ENC_KERNEL_SIZE 149296
+-
+-const unsigned int gen9_hevc_encoder_kernels[GEN9_HEVC_ENC_KERNEL_SIZE];
++const unsigned int* gen9_hevc_encoder_kernels;
++int gen9_hevc_encoder_kernels_size;
+ 
+ #endif
+diff --git a/src/gen9_hevc_encoder.c b/src/gen9_hevc_encoder.c
+index 80d9d9c..4732011 100644
+--- a/src/gen9_hevc_encoder.c
++++ b/src/gen9_hevc_encoder.c
+@@ -7355,7 +7355,7 @@ gen9_hevc_vme_context_init(VADriverContextP ctx,
+     struct gen9_hevc_encoder_state *priv_state = NULL;
+ 
+     hevc_enc_kernel_ptr = (void *)gen9_hevc_encoder_kernels;
+-    hevc_enc_kernel_size = sizeof(gen9_hevc_encoder_kernels);
++    hevc_enc_kernel_size = gen9_hevc_encoder_kernels_size;
+ 
+     vme_context = calloc(1, sizeof(*vme_context));
+     priv_ctx = calloc(1, sizeof(*priv_ctx));
+diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
+index faa6598..07f2539 100644
+--- a/src/gen9_post_processing.c
++++ b/src/gen9_post_processing.c
+@@ -37,6 +37,7 @@
+ #include "i965_render.h"
+ #include "intel_media.h"
+ 
++#include "kernels.h"
+ #include "gen8_post_processing.h"
+ #include "gen75_picture_process.h"
+ #include "intel_gen_vppapi.h"
+@@ -110,40 +111,28 @@ static const uint32_t pp_nv12_blending_gen9[][4] = {
+ 
+ #define DEFAULT_MOCS    0x02
+ 
+-static const uint32_t pp_10bit_scaling_gen9[][4] = {
+-#include "shaders/post_processing/gen9/conv_p010.g9b"
+-};
+-
+-static const uint32_t pp_yuv420p8_scaling_gen9[][4] = {
+-#include "shaders/post_processing/gen9/conv_nv12.g9b"
+-};
+-
+-static const uint32_t pp_10bit_8bit_scaling_gen9[][4] = {
+-#include "shaders/post_processing/gen9/conv_10bit_8bit.g9b"
+-};
+-
+ struct i965_kernel pp_common_scaling_gen9[] = {
+     {
+         "10bit to 10bit",
+         0,
+-        pp_10bit_scaling_gen9,
+-        sizeof(pp_10bit_scaling_gen9),
++        NULL,
++        0,
+         NULL,
+     },
+ 
+     {
+         "8bit to 8bit",
+         1,
+-        pp_yuv420p8_scaling_gen9,
+-        sizeof(pp_yuv420p8_scaling_gen9),
++        NULL,
++        0,
+         NULL,
+     },
+ 
+     {
+         "10bit to 8bit",
+         2,
+-        pp_10bit_8bit_scaling_gen9,
+-        sizeof(pp_10bit_8bit_scaling_gen9),
++        NULL,
++        0,
+         NULL,
+     },
+ };
+@@ -535,6 +524,15 @@ gen9_post_processing_context_init(VADriverContextP ctx,
+ 
+     pp_context->intel_post_processing = gen9_post_processing;
+ 
++    /* load kernels */
++    pp_common_scaling_gen9[0].bin  = pp_10bit_scaling_gen9;
++    pp_common_scaling_gen9[0].size = pp_10bit_scaling_gen9_size;
++    pp_common_scaling_gen9[1].bin  = pp_yuv420p8_scaling_gen9;
++    pp_common_scaling_gen9[1].size = pp_yuv420p8_scaling_gen9_size;
++    pp_common_scaling_gen9[2].bin  = pp_10bit_8bit_scaling_gen9;
++    pp_common_scaling_gen9[2].size = pp_10bit_8bit_scaling_gen9_size;
++    // TODO: handle missing kernels
++
+     gpe_context = &pp_context->scaling_gpe_context;
+     gen8_gpe_load_kernels(ctx, gpe_context, pp_common_scaling_gen9, ARRAY_ELEMS(pp_common_scaling_gen9));
+     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
+index 8389dde..290c0eb 100644
+--- a/src/gen9_vp9_encoder.c
++++ b/src/gen9_vp9_encoder.c
+@@ -4283,7 +4283,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
+     memset(&scale_kernel, 0, sizeof(scale_kernel));
+ 
+     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+-                                         sizeof(media_vp9_kernels),
++                                         media_vp9_kernels_size,
+                                          INTEL_VP9_ENC_SCALING4X,
+                                          0,
+                                          &scale_kernel);
+@@ -4304,7 +4304,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
+     memset(&scale_kernel, 0, sizeof(scale_kernel));
+ 
+     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+-                                         sizeof(media_vp9_kernels),
++                                         media_vp9_kernels_size,
+                                          INTEL_VP9_ENC_SCALING2X,
+                                          0,
+                                          &scale_kernel);
+@@ -4346,7 +4346,7 @@ gen9_vme_me_context_init_vp9(VADriverContextP ctx,
+     memset(&scale_kernel, 0, sizeof(scale_kernel));
+ 
+     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+-                                         sizeof(media_vp9_kernels),
++                                         media_vp9_kernels_size,
+                                          INTEL_VP9_ENC_ME,
+                                          0,
+                                          &scale_kernel);
+@@ -4394,7 +4394,7 @@ gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
+         memset(&scale_kernel, 0, sizeof(scale_kernel));
+ 
+         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+-                                             sizeof(media_vp9_kernels),
++                                             media_vp9_kernels_size,
+                                              INTEL_VP9_ENC_MBENC,
+                                              i,
+                                              &scale_kernel);
+@@ -4434,7 +4434,7 @@ gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
+         memset(&scale_kernel, 0, sizeof(scale_kernel));
+ 
+         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+-                                             sizeof(media_vp9_kernels),
++                                             media_vp9_kernels_size,
+                                              INTEL_VP9_ENC_BRC,
+                                              i,
+                                              &scale_kernel);
+@@ -4473,7 +4473,7 @@ gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
+     memset(&scale_kernel, 0, sizeof(scale_kernel));
+ 
+     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+-                                         sizeof(media_vp9_kernels),
++                                         media_vp9_kernels_size,
+                                          INTEL_VP9_ENC_DYS,
+                                          0,
+                                          &scale_kernel);
+diff --git a/src/gen9_vp9_encoder_kernels.h b/src/gen9_vp9_encoder_kernels.h
+index 961919c..bf2cabe 100644
+--- a/src/gen9_vp9_encoder_kernels.h
++++ b/src/gen9_vp9_encoder_kernels.h
+@@ -29,7 +29,7 @@
+ #ifndef _GEN9_VP9_ENCODER_KERNELS_H
+ #define _GEN9_VP9_ENCODER_KERNELS_H
+ 
+-#define AllVP9ENC_SZ 39334
+-extern const unsigned int media_vp9_kernels[AllVP9ENC_SZ];
++extern const unsigned int* media_vp9_kernels;
++extern int media_vp9_kernels_size;
+ 
+ #endif
+diff --git a/src/i965_avc_encoder.c b/src/i965_avc_encoder.c
+index 692f3c1..885e151 100644
+--- a/src/i965_avc_encoder.c
++++ b/src/i965_avc_encoder.c
+@@ -9703,18 +9703,18 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
+         IS_BXT(i965->intel.device_info)) {
+         if (!encoder_context->fei_enabled) {
+             generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
+-            generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
++            generic_ctx->enc_kernel_size = skl_avc_encoder_kernels_size;
+         } else {
+             generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
+-            generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
++            generic_ctx->enc_kernel_size = skl_avc_fei_encoder_kernels_size;
+         }
+     } else if (IS_GEN8(i965->intel.device_info)) {
+         generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
+-        generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
++        generic_ctx->enc_kernel_size = bdw_avc_encoder_kernels_size;
+     } else if (IS_KBL(i965->intel.device_info) ||
+                IS_GLK(i965->intel.device_info)) {
+         generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
+-        generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
++        generic_ctx->enc_kernel_size = kbl_avc_encoder_kernels_size;
+     } else
+         goto allocate_structure_failed;
+ 
+diff --git a/src/i965_avc_encoder_kernels.h b/src/i965_avc_encoder_kernels.h
+index f7be54e..5576160 100644
+--- a/src/i965_avc_encoder_kernels.h
++++ b/src/i965_avc_encoder_kernels.h
+@@ -30,15 +30,14 @@
+ #ifndef _I965_AVC_ENCODER_KERNELS_H
+ #define _I965_AVC_ENCODER_KERNELS_H
+ 
+-#define AVC_ENC_SKL_SZ 96346
+-extern const unsigned int skl_avc_encoder_kernels[AVC_ENC_SKL_SZ];
++const unsigned int* skl_avc_encoder_kernels;
++const unsigned int* skl_avc_fei_encoder_kernels;
++const unsigned int* kbl_avc_encoder_kernels;
++const unsigned int* bdw_avc_encoder_kernels;
+ 
+-#define AVC_ENC_FEI_SKL_SZ 38000
+-extern const unsigned int skl_avc_fei_encoder_kernels[AVC_ENC_FEI_SKL_SZ];
++int skl_avc_encoder_kernels_size;
++int skl_avc_fei_encoder_kernels_size;
++int kbl_avc_encoder_kernels_size;
++int bdw_avc_encoder_kernels_size;
+ 
+-#define AVC_ENC_KBL_SZ 101994
+-extern const unsigned int kbl_avc_encoder_kernels[AVC_ENC_KBL_SZ];
+-
+-#define AVC_ENC_BDW_SZ  95532
+-extern const unsigned int bdw_avc_encoder_kernels[AVC_ENC_BDW_SZ];
+ #endif//_I965_AVC_ENCODER_KERNELS_H
+diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
+index 02f4895..687c710 100644
+--- a/src/i965_drv_video.c
++++ b/src/i965_drv_video.c
+@@ -52,6 +52,8 @@
+ 
+ #include "gen9_vp9_encapi.h"
+ 
++#include "kernels.h"
++
+ #define CONFIG_ID_OFFSET                0x01000000
+ #define CONTEXT_ID_OFFSET               0x02000000
+ #define SURFACE_ID_OFFSET               0x04000000
+@@ -7066,6 +7068,8 @@ i965_Terminate(VADriverContextP ctx)
+         ctx->pDriverData = NULL;
+     }
+ 
++    unload_externel_kernels();
++
+     return VA_STATUS_SUCCESS;
+ }
+ 
+@@ -7075,6 +7079,8 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx);
+ VAStatus
+ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
+ {
++    load_external_kernels();
++
+     struct VADriverVTable * const vtable = ctx->vtable;
+     struct VADriverVTableVPP * const vtable_vpp = ctx->vtable_vpp;
+ 
+diff --git a/src/kernels.c b/src/kernels.c
+new file mode 100644
+index 0000000..e8f55d4
+--- /dev/null
++++ b/src/kernels.c
+@@ -0,0 +1,202 @@
++#include "kernels.h"
++
++#include <dlfcn.h>
++
++#include "i965_avc_encoder_kernels.h"
++#include "gen9_hevc_enc_kernels.h"
++#include "gen9_vp9_encoder_kernels.h"
++
++#define DEFINE_EXTERNAL_KERNEL(name) \
++  const unsigned int* name = NULL; \
++  int name ##_size = 0
++
++DEFINE_EXTERNAL_KERNEL(media_vp9_kernels);
++DEFINE_EXTERNAL_KERNEL(gen9_hevc_encoder_kernels);
++DEFINE_EXTERNAL_KERNEL(skl_avc_encoder_kernels);
++DEFINE_EXTERNAL_KERNEL(skl_avc_fei_encoder_kernels);
++DEFINE_EXTERNAL_KERNEL(kbl_avc_encoder_kernels);
++DEFINE_EXTERNAL_KERNEL(bdw_avc_encoder_kernels);
++
++#undef DEFINE_EXTERNAL_KERNEL
++#define DEFINE_EXTERNAL_KERNEL(name) \
++  const uint32_t (*(name))[4] = NULL; \
++  int name ##_size = 0
++
++/* Gen 8 VP8 kernels */
++DEFINE_EXTERNAL_KERNEL(gen8_brc_init_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_brc_reset_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_scaling_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_me_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_dist_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_luma_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_chroma_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_mbenc_p_frame_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_mpu_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_tpu_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_brc_update_bin_vp8);
++
++/* Gen 9 VP8 kernels */
++DEFINE_EXTERNAL_KERNEL(gen9_brc_init_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_brc_reset_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_scaling_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_me_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_dist_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_luma_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_chroma_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_mbenc_p_frame_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_mpu_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_tpu_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_brc_update_bin_vp8);
++
++/* Gen 7.5 sharpening kernels */
++DEFINE_EXTERNAL_KERNEL(gen75_gpe_sharpening_h_blur);
++DEFINE_EXTERNAL_KERNEL(gen75_gpe_sharpening_v_blur);
++DEFINE_EXTERNAL_KERNEL(gen75_gpe_sharpening_unmask);
++
++/* Gen 8 sharpening kernels */
++DEFINE_EXTERNAL_KERNEL(gen8_gpe_sharpening_h_blur);
++DEFINE_EXTERNAL_KERNEL(gen8_gpe_sharpening_v_blur);
++DEFINE_EXTERNAL_KERNEL(gen8_gpe_sharpening_unmask);
++/* Gen 8 post-processing kernels */
++DEFINE_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen8);
++
++/* Gen 9 post-processing kernels */
++DEFINE_EXTERNAL_KERNEL(pp_10bit_scaling_gen9);
++DEFINE_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen9);
++DEFINE_EXTERNAL_KERNEL(pp_10bit_8bit_scaling_gen9);
++
++static void* dso_handle = NULL;
++
++int load_external_kernels(void) {
++  /* already loaded */
++  if (dso_handle != NULL) {
++    return 1;
++  }
++
++  dso_handle = dlopen("./i965_drv_video_shaders.so", RTLD_LOCAL | RTLD_NOW);
++  if (dso_handle == NULL) {
++    goto err;
++  }
++
++#define LOAD_EXTERNAL_KERNEL(name) \
++  do { \
++    const int* tmp_size = dlsym(dso_handle, #name "_size"); \
++    name = dlsym(dso_handle, #name); \
++    if (tmp_size == NULL || name == NULL) { \
++      goto err; \
++    } \
++    name##_size = *tmp_size; \
++  } while(0)
++
++  LOAD_EXTERNAL_KERNEL(media_vp9_kernels);
++  LOAD_EXTERNAL_KERNEL(gen9_hevc_encoder_kernels);
++  LOAD_EXTERNAL_KERNEL(skl_avc_encoder_kernels);
++  LOAD_EXTERNAL_KERNEL(skl_avc_fei_encoder_kernels);
++  LOAD_EXTERNAL_KERNEL(kbl_avc_encoder_kernels);
++  LOAD_EXTERNAL_KERNEL(bdw_avc_encoder_kernels);
++
++  LOAD_EXTERNAL_KERNEL(gen8_brc_init_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen8_brc_reset_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen8_scaling_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen8_me_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_dist_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_luma_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_chroma_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen8_mbenc_p_frame_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen8_mpu_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen8_tpu_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen8_brc_update_bin_vp8);
++
++  LOAD_EXTERNAL_KERNEL(gen9_brc_init_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen9_brc_reset_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen9_scaling_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen9_me_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_dist_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_luma_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_chroma_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen9_mbenc_p_frame_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen9_mpu_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen9_tpu_bin_vp8);
++  LOAD_EXTERNAL_KERNEL(gen9_brc_update_bin_vp8);
++
++  LOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_h_blur);
++  LOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_v_blur);
++  LOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_unmask);
++
++  LOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_h_blur);
++  LOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_v_blur);
++  LOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_unmask);
++  LOAD_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen8);
++
++  LOAD_EXTERNAL_KERNEL(pp_10bit_scaling_gen9);
++  LOAD_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen9);
++  LOAD_EXTERNAL_KERNEL(pp_10bit_8bit_scaling_gen9);
++
++err:
++  unload_externel_kernels();
++  return 0;
++}
++
++void unload_externel_kernels(void) {
++  if (dso_handle == NULL) {
++    return;
++  }
++
++#define UNLOAD_EXTERNAL_KERNEL(name) \
++  do { \
++    name = NULL; \
++    name##_size = 0; \
++  } while(0)
++
++  UNLOAD_EXTERNAL_KERNEL(media_vp9_kernels);
++  UNLOAD_EXTERNAL_KERNEL(gen9_hevc_encoder_kernels);
++  UNLOAD_EXTERNAL_KERNEL(skl_avc_encoder_kernels);
++  UNLOAD_EXTERNAL_KERNEL(skl_avc_fei_encoder_kernels);
++  UNLOAD_EXTERNAL_KERNEL(kbl_avc_encoder_kernels);
++  UNLOAD_EXTERNAL_KERNEL(bdw_avc_encoder_kernels);
++
++  UNLOAD_EXTERNAL_KERNEL(gen8_brc_init_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen8_brc_reset_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen8_scaling_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen8_me_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_dist_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_luma_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_chroma_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen8_mbenc_p_frame_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen8_mpu_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen8_tpu_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen8_brc_update_bin_vp8);
++
++  UNLOAD_EXTERNAL_KERNEL(gen9_brc_init_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen9_brc_reset_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen9_scaling_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen9_me_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_dist_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_luma_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_chroma_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen9_mbenc_p_frame_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen9_mpu_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen9_tpu_bin_vp8);
++  UNLOAD_EXTERNAL_KERNEL(gen9_brc_update_bin_vp8);
++
++  UNLOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_h_blur);
++  UNLOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_v_blur);
++  UNLOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_unmask);
++
++  UNLOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_h_blur);
++  UNLOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_v_blur);
++  UNLOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_unmask);
++  UNLOAD_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen8);
++
++  UNLOAD_EXTERNAL_KERNEL(pp_10bit_scaling_gen9);
++  UNLOAD_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen9);
++  UNLOAD_EXTERNAL_KERNEL(pp_10bit_8bit_scaling_gen9);
++
++  dlclose(dso_handle);
++  dso_handle = NULL;
++}
++
++int has_external_kernels(void) {
++  return dso_handle != NULL;
++}
++
+diff --git a/src/kernels.h b/src/kernels.h
+new file mode 100644
+index 0000000..a0f1b63
+--- /dev/null
++++ b/src/kernels.h
+@@ -0,0 +1,54 @@
++#ifndef KERNELS_H
++#define KERNELS_H
++
++#include <stddef.h>
++#include <stdint.h>
++
++
++
++int load_external_kernels(void);
++void unload_externel_kernels(void);
++int has_external_kernels(void);
++
++#define DECLARE_EXTERNAL_KERNEL(name) \
++  const uint32_t (*(name))[4]; \
++  int name ##_size
++
++DECLARE_EXTERNAL_KERNEL(gen8_brc_init_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_brc_reset_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_scaling_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_me_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_dist_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_luma_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_chroma_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_mbenc_p_frame_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_mpu_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_tpu_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_brc_update_bin_vp8);
++
++DECLARE_EXTERNAL_KERNEL(gen9_brc_init_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_brc_reset_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_scaling_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_me_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_dist_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_luma_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_chroma_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_mbenc_p_frame_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_mpu_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_tpu_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_brc_update_bin_vp8);
++
++DECLARE_EXTERNAL_KERNEL(gen75_gpe_sharpening_h_blur);
++DECLARE_EXTERNAL_KERNEL(gen75_gpe_sharpening_v_blur);
++DECLARE_EXTERNAL_KERNEL(gen75_gpe_sharpening_unmask);
++
++DECLARE_EXTERNAL_KERNEL(gen8_gpe_sharpening_h_blur);
++DECLARE_EXTERNAL_KERNEL(gen8_gpe_sharpening_v_blur);
++DECLARE_EXTERNAL_KERNEL(gen8_gpe_sharpening_unmask);
++DECLARE_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen8);
++
++DECLARE_EXTERNAL_KERNEL(pp_10bit_scaling_gen9);
++DECLARE_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen9);
++DECLARE_EXTERNAL_KERNEL(pp_10bit_8bit_scaling_gen9);
++
++#endif
diff --git a/debian/patches/0002-Check-if-kernels-are-available.patch b/debian/patches/0002-Check-if-kernels-are-available.patch
new file mode 100644
index 0000000..54170a6
--- /dev/null
+++ b/debian/patches/0002-Check-if-kernels-are-available.patch
@@ -0,0 +1,42 @@
+From: Sebastian Ramacher <sebastian at ramacher.at>
+Date: Mon, 16 Oct 2017 19:23:28 +0200
+Subject: Check if kernels are available
+
+---
+ src/i965_drv_video.h | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
+index 2d27b53..1c175c9 100644
+--- a/src/i965_drv_video.h
++++ b/src/i965_drv_video.h
+@@ -125,7 +125,8 @@
+                                  (ctx)->intel.has_bsd)
+ 
+ #define HAS_VP8_ENCODING(ctx)   ((ctx)->codec_info->has_vp8_encoding && \
+-                                 (ctx)->intel.has_bsd)
++                                 (ctx)->intel.has_bsd && \
++                                 has_external_kernels())
+ 
+ #define HAS_H264_MVC_DECODING(ctx) \
+     (HAS_H264_DECODING(ctx) && (ctx)->codec_info->h264_mvc_dec_profiles)
+@@ -144,7 +145,8 @@
+                                          (ctx)->intel.has_bsd)
+ 
+ #define HAS_VP9_DECODING(ctx)          ((ctx)->codec_info->has_vp9_decoding && \
+-                                         (ctx)->intel.has_bsd)
++                                         (ctx)->intel.has_bsd && \
++                                         has_external_kernels())
+ 
+ #define HAS_VP9_DECODING_PROFILE(ctx, profile)                     \
+     (HAS_VP9_DECODING(ctx) &&                                      \
+@@ -159,7 +161,8 @@
+                                          (ctx)->intel.has_bsd)
+ 
+ #define HAS_VP9_ENCODING(ctx)          ((ctx)->codec_info->has_vp9_encoding && \
+-                                         (ctx)->intel.has_bsd)
++                                         (ctx)->intel.has_bsd && \
++                                         has_external_kernels())
+ 
+ #define HAS_VP9_ENCODING_PROFILE(ctx, profile)                     \
+     (HAS_VP9_ENCODING(ctx) &&                                      \
diff --git a/debian/patches/0003-Allow-to-disable-building-of-split-kernels.patch b/debian/patches/0003-Allow-to-disable-building-of-split-kernels.patch
new file mode 100644
index 0000000..9ad79ff
--- /dev/null
+++ b/debian/patches/0003-Allow-to-disable-building-of-split-kernels.patch
@@ -0,0 +1,83 @@
+From: Sebastian Ramacher <sebastian at ramacher.at>
+Date: Mon, 16 Oct 2017 21:06:30 +0200
+Subject: Allow to disable building of split kernels
+
+---
+ configure.ac         | 11 +++++++++++
+ src/Makefile.am      | 12 +++---------
+ src/Makefile.sources | 10 ++++++++++
+ 3 files changed, 24 insertions(+), 9 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index d7fd36a..1acf374 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -72,6 +72,12 @@ AC_ARG_ENABLE([tests],
+                     [build tests @<:@default=no@:>@])],
+     [], [enable_tests="no"])
+ 
++AC_ARG_ENABLE(split-kernels,
++    [AC_HELP_STRING([--enable-split-kernels],
++                    [split kernels without source @<:@default=yes@:>@])],
++    [], [enable_split_kernels="yes"])
++
++
+ AC_DISABLE_STATIC
+ AC_PROG_LIBTOOL
+ AC_PROG_CC
+@@ -129,6 +135,11 @@ fi
+ 
+ AM_CONDITIONAL(ENABLE_TESTS, test "$enable_tests" = "yes")
+ 
++if test "$enable_split_kernels" = "yes"; then
++    AC_DEFINE([HAVE_SPLIT_KERNELS], [1], [DEfined to 1 if split kernels are enabled])
++fi
++AM_CONDITIONAL(HAVE_SPLIT_KERNELS, test "$enable_split_kernels" = "yes")
++
+ VA_VERSION=`$PKG_CONFIG --modversion libva`
+ VA_MAJOR_VERSION=`echo "$VA_VERSION" | cut -d'.' -f1`
+ VA_MINOR_VERSION=`echo "$VA_VERSION" | cut -d'.' -f2`
+diff --git a/src/Makefile.am b/src/Makefile.am
+index 7cfb46c..be403bc 100644
+--- a/src/Makefile.am
++++ b/src/Makefile.am
+@@ -66,18 +66,12 @@ i965_drv_video_la_LIBADD	= libi965_drv_video.la $(driver_libs)
+ i965_drv_video_la_SOURCES	=
+ 
+ # shaders module
++if HAVE_SPLIT_KERNELS
+ i965_drv_video_shaders_la_LTLIBRARIES	= i965_drv_video_shaders.la
+ i965_drv_video_shaders_ladir		= $(LIBVA_DRIVERS_PATH)
+ i965_drv_video_shaders_la_LDFLAGS	= -module $(driver_ldflags)
+-i965_drv_video_shaders_la_SOURCES	= \
+-	gen75_vpp_gpe_kernels.c \
+-	gen8_encoder_vp8_kernels.c \
+-	gen8_post_processing_kernels.c \
+-	gen9_hevc_enc_kernels_binary.c \
+-	gen9_post_processing_kernels.c \
+-	gen9_vp9_encoder_kernels.c \
+-	i965_avc_encoder_kernels.c \
+-	$(NULL)
++i965_drv_video_shaders_la_SOURCES	= $(kernel_source_c)
++endif
+ 
+ noinst_HEADERS			= $(source_h)
+ 
+diff --git a/src/Makefile.sources b/src/Makefile.sources
+index 0ba05f8..086db8f 100644
+--- a/src/Makefile.sources
++++ b/src/Makefile.sources
+@@ -131,3 +131,13 @@ source_h = \
+ 	gen9_hevc_enc_utils.h \
+ 	gen9_hevc_encoder.h \
+ 	$(NULL)
++
++kernel_source_c = \
++	gen75_vpp_gpe_kernels.c \
++	gen8_encoder_vp8_kernels.c \
++	gen8_post_processing_kernels.c \
++	gen9_hevc_enc_kernels_binary.c \
++	gen9_post_processing_kernels.c \
++	gen9_vp9_encoder_kernels.c \
++	i965_avc_encoder_kernels.c \
++	$(NULL)
diff --git a/debian/patches/0004-Load-scaling-kernels-only-if-available.patch b/debian/patches/0004-Load-scaling-kernels-only-if-available.patch
new file mode 100644
index 0000000..249ee05
--- /dev/null
+++ b/debian/patches/0004-Load-scaling-kernels-only-if-available.patch
@@ -0,0 +1,183 @@
+From: Sebastian Ramacher <sebastian at ramacher.at>
+Date: Mon, 16 Oct 2017 21:45:05 +0200
+Subject: Load scaling kernels only if available
+
+---
+ src/gen8_post_processing.c | 72 ++++++++++++++++++++++---------------------
+ src/gen9_post_processing.c | 76 ++++++++++++++++++++++++----------------------
+ 2 files changed, 77 insertions(+), 71 deletions(-)
+
+diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
+index 3317452..ea01ede 100644
+--- a/src/gen8_post_processing.c
++++ b/src/gen8_post_processing.c
+@@ -1669,43 +1669,45 @@ gen8_post_processing_context_init(VADriverContextP ctx,
+      * I420 ->I420
+      * I420 ->NV12
+      */
+-    gpe_context = &pp_context->scaling_gpe_context;
+-    memset(&scaling_kernel, 0, sizeof(scaling_kernel));
+-    scaling_kernel.bin = pp_yuv420p8_scaling_gen8;
+-    scaling_kernel.size = pp_yuv420p8_scaling_gen8_size;
+-    gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
+-    gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+-    gpe_context->idrt.max_entries = 1;
+-    gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+-    gpe_context->sampler.max_entries = 1;
+-    gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 32);
+-
+-    gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
+-    gpe_context->surface_state_binding_table.binding_table_offset = 0;
+-    gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
+-    gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN8, 64);
+-
+-    if (i965->intel.eu_total > 0) {
+-        gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
+-    } else {
+-        if (i965->intel.has_bsd2)
+-            gpe_context->vfe_state.max_num_threads = 300;
+-        else
+-            gpe_context->vfe_state.max_num_threads = 60;
++    if (has_external_kernels()) {
++      gpe_context = &pp_context->scaling_gpe_context;
++      memset(&scaling_kernel, 0, sizeof(scaling_kernel));
++      scaling_kernel.bin = pp_yuv420p8_scaling_gen8;
++      scaling_kernel.size = pp_yuv420p8_scaling_gen8_size;
++      gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
++      gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
++      gpe_context->idrt.max_entries = 1;
++      gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
++      gpe_context->sampler.max_entries = 1;
++      gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 32);
++
++      gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
++      gpe_context->surface_state_binding_table.binding_table_offset = 0;
++      gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
++      gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN8, 64);
++
++      if (i965->intel.eu_total > 0) {
++          gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
++      } else {
++          if (i965->intel.has_bsd2)
++              gpe_context->vfe_state.max_num_threads = 300;
++          else
++              gpe_context->vfe_state.max_num_threads = 60;
++      }
++
++      gpe_context->vfe_state.curbe_allocation_size = 37;
++      gpe_context->vfe_state.urb_entry_size = 16;
++      if (i965->intel.has_bsd2)
++          gpe_context->vfe_state.num_urb_entries = 127;
++      else
++          gpe_context->vfe_state.num_urb_entries = 64;
++
++      gpe_context->vfe_state.gpgpu_mode = 0;
++
++      gen8_gpe_context_init(ctx, gpe_context);
++      pp_context->scaling_gpe_context_initialized |= VPPGPE_8BIT_8BIT;
+     }
+ 
+-    gpe_context->vfe_state.curbe_allocation_size = 37;
+-    gpe_context->vfe_state.urb_entry_size = 16;
+-    if (i965->intel.has_bsd2)
+-        gpe_context->vfe_state.num_urb_entries = 127;
+-    else
+-        gpe_context->vfe_state.num_urb_entries = 64;
+-
+-    gpe_context->vfe_state.gpgpu_mode = 0;
+-
+-    gen8_gpe_context_init(ctx, gpe_context);
+-    pp_context->scaling_gpe_context_initialized |= VPPGPE_8BIT_8BIT;
+-
+     return;
+ }
+ 
+diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
+index 07f2539..6626177 100644
+--- a/src/gen9_post_processing.c
++++ b/src/gen9_post_processing.c
+@@ -524,44 +524,48 @@ gen9_post_processing_context_init(VADriverContextP ctx,
+ 
+     pp_context->intel_post_processing = gen9_post_processing;
+ 
+-    /* load kernels */
+-    pp_common_scaling_gen9[0].bin  = pp_10bit_scaling_gen9;
+-    pp_common_scaling_gen9[0].size = pp_10bit_scaling_gen9_size;
+-    pp_common_scaling_gen9[1].bin  = pp_yuv420p8_scaling_gen9;
+-    pp_common_scaling_gen9[1].size = pp_yuv420p8_scaling_gen9_size;
+-    pp_common_scaling_gen9[2].bin  = pp_10bit_8bit_scaling_gen9;
+-    pp_common_scaling_gen9[2].size = pp_10bit_8bit_scaling_gen9_size;
+-    // TODO: handle missing kernels
+-
+-    gpe_context = &pp_context->scaling_gpe_context;
+-    gen8_gpe_load_kernels(ctx, gpe_context, pp_common_scaling_gen9, ARRAY_ELEMS(pp_common_scaling_gen9));
+-    gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+-    gpe_context->idrt.max_entries = ALIGN(ARRAY_ELEMS(pp_common_scaling_gen9), 2);
+-    gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+-    gpe_context->sampler.max_entries = 1;
+-    gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);
+-
+-    gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
+-    gpe_context->surface_state_binding_table.binding_table_offset = 0;
+-    gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
+-    gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
+-
+-    if (i965->intel.eu_total > 0) {
+-        gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
+-    } else {
+-        if (i965->intel.has_bsd2)
+-            gpe_context->vfe_state.max_num_threads = 300;
+-        else
+-            gpe_context->vfe_state.max_num_threads = 60;
++    /* load kernels if available */
++    if (pp_common_scaling_gen9[0].bin == NULL && has_external_kernels()) {
++      pp_common_scaling_gen9[0].bin  = pp_10bit_scaling_gen9;
++      pp_common_scaling_gen9[0].size = pp_10bit_scaling_gen9_size;
++      pp_common_scaling_gen9[1].bin  = pp_yuv420p8_scaling_gen9;
++      pp_common_scaling_gen9[1].size = pp_yuv420p8_scaling_gen9_size;
++      pp_common_scaling_gen9[2].bin  = pp_10bit_8bit_scaling_gen9;
++      pp_common_scaling_gen9[2].size = pp_10bit_8bit_scaling_gen9_size;
+     }
+ 
+-    gpe_context->vfe_state.curbe_allocation_size = 37;
+-    gpe_context->vfe_state.urb_entry_size = 16;
+-    gpe_context->vfe_state.num_urb_entries = 127;
+-    gpe_context->vfe_state.gpgpu_mode = 0;
+-
+-    gen8_gpe_context_init(ctx, gpe_context);
+-    pp_context->scaling_gpe_context_initialized |= (VPPGPE_8BIT_8BIT | VPPGPE_10BIT_10BIT | VPPGPE_10BIT_8BIT);
++    /* initialize scaling context only if kernels are available */
++    if (pp_common_scaling_gen9[0].bin != NULL) {
++      gpe_context = &pp_context->scaling_gpe_context;
++      gen8_gpe_load_kernels(ctx, gpe_context, pp_common_scaling_gen9, ARRAY_ELEMS(pp_common_scaling_gen9));
++      gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
++      gpe_context->idrt.max_entries = ALIGN(ARRAY_ELEMS(pp_common_scaling_gen9), 2);
++      gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
++      gpe_context->sampler.max_entries = 1;
++      gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);
++
++      gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
++      gpe_context->surface_state_binding_table.binding_table_offset = 0;
++      gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
++      gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
++
++      if (i965->intel.eu_total > 0) {
++          gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
++      } else {
++          if (i965->intel.has_bsd2)
++              gpe_context->vfe_state.max_num_threads = 300;
++          else
++              gpe_context->vfe_state.max_num_threads = 60;
++      }
++
++      gpe_context->vfe_state.curbe_allocation_size = 37;
++      gpe_context->vfe_state.urb_entry_size = 16;
++      gpe_context->vfe_state.num_urb_entries = 127;
++      gpe_context->vfe_state.gpgpu_mode = 0;
++
++      gen8_gpe_context_init(ctx, gpe_context);
++      pp_context->scaling_gpe_context_initialized |= (VPPGPE_8BIT_8BIT | VPPGPE_10BIT_10BIT | VPPGPE_10BIT_8BIT);
++    }
+ 
+     return;
+ }
diff --git a/debian/patches/series b/debian/patches/series
new file mode 100644
index 0000000..6088c96
--- /dev/null
+++ b/debian/patches/series
@@ -0,0 +1,4 @@
+0001-Move-shaders-without-source-to-extra-library.patch
+0002-Check-if-kernels-are-available.patch
+0003-Allow-to-disable-building-of-split-kernels.patch
+0004-Load-scaling-kernels-only-if-available.patch

-- 
intel-vaapi-driver packaging



More information about the pkg-multimedia-commits mailing list