[SCM] intel-vaapi-driver/master: Add patches to handle split kernels
sramacher at users.alioth.debian.org
sramacher at users.alioth.debian.org
Tue Oct 17 21:01:27 UTC 2017
The following commit has been merged in the master branch:
commit 325e7f66321d2f1d4eb170e4b84fb3ea09bcfc7e
Author: Sebastian Ramacher <sebastian at ramacher.at>
Date: Tue Oct 17 23:00:29 2017 +0200
Add patches to handle split kernels
diff --git a/debian/patches/0001-Move-shaders-without-source-to-extra-library.patch b/debian/patches/0001-Move-shaders-without-source-to-extra-library.patch
new file mode 100644
index 0000000..fda25d1
--- /dev/null
+++ b/debian/patches/0001-Move-shaders-without-source-to-extra-library.patch
@@ -0,0 +1,1003 @@
+From: Sebastian Ramacher <sebastian at ramacher.at>
+Date: Mon, 16 Oct 2017 19:08:40 +0200
+Subject: Move shaders without source to extra library
+
+---
+ src/Makefile.am | 14 +++
+ src/Makefile.sources | 4 +-
+ src/gen75_vpp_gpe.c | 65 ++++++------
+ src/gen8_encoder_vp8.c | 71 +++----------
+ src/gen8_post_processing.c | 7 +-
+ src/gen9_encoder_vp8.c | 71 +++----------
+ src/gen9_hevc_enc_kernels_binary.h | 5 +-
+ src/gen9_hevc_encoder.c | 2 +-
+ src/gen9_post_processing.c | 34 +++----
+ src/gen9_vp9_encoder.c | 12 +--
+ src/gen9_vp9_encoder_kernels.h | 4 +-
+ src/i965_avc_encoder.c | 8 +-
+ src/i965_avc_encoder_kernels.h | 17 ++--
+ src/i965_drv_video.c | 6 ++
+ src/kernels.c | 202 +++++++++++++++++++++++++++++++++++++
+ src/kernels.h | 54 ++++++++++
+ 16 files changed, 378 insertions(+), 198 deletions(-)
+ create mode 100644 src/kernels.c
+ create mode 100644 src/kernels.h
+
+diff --git a/src/Makefile.am b/src/Makefile.am
+index 06977c6..7cfb46c 100644
+--- a/src/Makefile.am
++++ b/src/Makefile.am
+@@ -65,6 +65,20 @@ i965_drv_video_la_LDFLAGS = -module $(driver_ldflags)
+ i965_drv_video_la_LIBADD = libi965_drv_video.la $(driver_libs)
+ i965_drv_video_la_SOURCES =
+
++# shaders module
++i965_drv_video_shaders_la_LTLIBRARIES = i965_drv_video_shaders.la
++i965_drv_video_shaders_ladir = $(LIBVA_DRIVERS_PATH)
++i965_drv_video_shaders_la_LDFLAGS = -module $(driver_ldflags)
++i965_drv_video_shaders_la_SOURCES = \
++ gen75_vpp_gpe_kernels.c \
++ gen8_encoder_vp8_kernels.c \
++ gen8_post_processing_kernels.c \
++ gen9_hevc_enc_kernels_binary.c \
++ gen9_post_processing_kernels.c \
++ gen9_vp9_encoder_kernels.c \
++ i965_avc_encoder_kernels.c \
++ $(NULL)
++
+ noinst_HEADERS = $(source_h)
+
+ if USE_X11
+diff --git a/src/Makefile.sources b/src/Makefile.sources
+index 00e799c..0ba05f8 100644
+--- a/src/Makefile.sources
++++ b/src/Makefile.sources
+@@ -53,18 +53,16 @@ source_c = \
+ vp8_probs.c \
+ vp9_probs.c \
+ vpx_quant.c \
+- gen9_vp9_encoder_kernels.c \
+ gen9_vp9_const_def.c \
+ gen9_vp9_encoder.c \
+ intel_common_vpp_internal.c \
+ i965_encoder_const_def.c \
+ i965_avc_const_def.c \
+- i965_avc_encoder_kernels.c \
+ i965_avc_encoder_common.c \
+ i965_avc_encoder.c \
+- gen9_hevc_enc_kernels_binary.c \
+ gen9_hevc_encoder.c \
+ gen9_hevc_enc_utils.c \
++ kernels.c \
+ $(NULL)
+
+ source_h = \
+diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c
+index ad893e8..3e700c7 100644
+--- a/src/gen75_vpp_gpe.c
++++ b/src/gen75_vpp_gpe.c
+@@ -33,6 +33,7 @@
+ #include "intel_batchbuffer.h"
+ #include "intel_driver.h"
+
++#include "kernels.h"
+ #include "i965_structs.h"
+ #include "i965_defines.h"
+ #include "i965_drv_video.h"
+@@ -52,70 +53,51 @@
+ #define CURBE_URB_ENTRY_LENGTH 4
+
+ /* Shaders information for sharpening */
+-static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
+-#include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
+-};
+-static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
+-#include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
+-};
+-static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
+-#include "shaders/post_processing/gen75/sharpening_unmask.g75b"
+-};
+ static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
+ {
+ "vpp: sharpening(horizontal blur)",
+ VPP_GPE_SHARPENING,
+- gen75_gpe_sharpening_h_blur,
+- sizeof(gen75_gpe_sharpening_h_blur),
++ NULL,
++ 0,
+ NULL
+ },
+ {
+ "vpp: sharpening(vertical blur)",
+ VPP_GPE_SHARPENING,
+- gen75_gpe_sharpening_v_blur,
+- sizeof(gen75_gpe_sharpening_v_blur),
++ NULL,
++ 0,
+ NULL
+ },
+ {
+ "vpp: sharpening(unmask)",
+ VPP_GPE_SHARPENING,
+- gen75_gpe_sharpening_unmask,
+- sizeof(gen75_gpe_sharpening_unmask),
++ NULL,
++ 0,
+ NULL
+ },
+ };
+
+ /* sharpening kernels for Broadwell */
+-static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
+-#include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
+-};
+-static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
+-#include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
+-};
+-static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
+-#include "shaders/post_processing/gen8/sharpening_unmask.g8b"
+-};
+-
+ static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
+ {
+ "vpp: sharpening(horizontal blur)",
+ VPP_GPE_SHARPENING,
+- gen8_gpe_sharpening_h_blur,
+- sizeof(gen8_gpe_sharpening_h_blur),
++ NULL,
++ 0,
+ NULL
+ },
+ {
+ "vpp: sharpening(vertical blur)",
+ VPP_GPE_SHARPENING,
+- gen8_gpe_sharpening_v_blur,
+- sizeof(gen8_gpe_sharpening_v_blur),
++ NULL,
++ 0,
+ NULL
+ },
+ {
+ "vpp: sharpening(unmask)",
+ VPP_GPE_SHARPENING,
+- gen8_gpe_sharpening_unmask,
+- sizeof(gen8_gpe_sharpening_unmask),
++ NULL,
++ 0,
+ NULL
+ },
+ };
+@@ -624,9 +606,10 @@ vpp_gpe_process_sharpening(VADriverContextP ctx,
+ if (vpp_gpe_ctx->is_first_frame) {
+ vpp_gpe_ctx->sub_shader_sum = 3;
+ struct i965_kernel * vpp_kernels;
+- if (IS_HASWELL(i965->intel.device_info))
++ // TODO: error out if no shaders available?
++ if (IS_HASWELL(i965->intel.device_info)) {
+ vpp_kernels = gen75_vpp_sharpening_kernels;
+- else if (IS_GEN8(i965->intel.device_info) ||
++ } else if (IS_GEN8(i965->intel.device_info) ||
+ IS_GEN9(i965->intel.device_info)) // TODO: build the sharpening kernel for GEN9
+ vpp_kernels = gen8_vpp_sharpening_kernels;
+ else
+@@ -868,6 +851,14 @@ vpp_gpe_context_init(VADriverContextP ctx)
+ gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
+
+ if (IS_HASWELL(i965->intel.device_info)) {
++ /* load kernels */
++ gen75_vpp_sharpening_kernels[0].bin = gen75_gpe_sharpening_h_blur;
++ gen75_vpp_sharpening_kernels[0].size = gen75_gpe_sharpening_h_blur_size;
++ gen75_vpp_sharpening_kernels[1].bin = gen75_gpe_sharpening_v_blur;
++ gen75_vpp_sharpening_kernels[1].size = gen75_gpe_sharpening_v_blur_size;
++ gen75_vpp_sharpening_kernels[2].bin = gen75_gpe_sharpening_unmask;
++ gen75_vpp_sharpening_kernels[2].size = gen75_gpe_sharpening_unmask_size;
++
+ vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init;
+ vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy;
+ vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels;
+@@ -880,6 +871,14 @@ vpp_gpe_context_init(VADriverContextP ctx)
+
+ } else if (IS_GEN8(i965->intel.device_info) ||
+ IS_GEN9(i965->intel.device_info)) {
++ /* load kernels */
++ gen8_vpp_sharpening_kernels[0].bin = gen8_gpe_sharpening_h_blur;
++ gen8_vpp_sharpening_kernels[0].size = gen8_gpe_sharpening_h_blur_size;
++ gen8_vpp_sharpening_kernels[1].bin = gen8_gpe_sharpening_v_blur;
++ gen8_vpp_sharpening_kernels[1].size = gen8_gpe_sharpening_v_blur_size;
++ gen8_vpp_sharpening_kernels[2].bin = gen8_gpe_sharpening_unmask;
++ gen8_vpp_sharpening_kernels[2].size = gen8_gpe_sharpening_unmask_size;
++
+ vpp_gpe_ctx->gpe_context_init = gen8_gpe_context_init;
+ vpp_gpe_ctx->gpe_context_destroy = gen8_gpe_context_destroy;
+ vpp_gpe_ctx->gpe_load_kernels = gen8_gpe_load_kernels;
+diff --git a/src/gen8_encoder_vp8.c b/src/gen8_encoder_vp8.c
+index 8ac3932..f1334f7 100644
+--- a/src/gen8_encoder_vp8.c
++++ b/src/gen8_encoder_vp8.c
+@@ -36,6 +36,7 @@
+ #include "intel_batchbuffer.h"
+ #include "intel_driver.h"
+
++#include "kernels.h"
+ #include "i965_defines.h"
+ #include "i965_drv_video.h"
+ #include "i965_encoder.h"
+@@ -49,89 +50,45 @@ extern struct i965_kernel vp8_kernels_mpu[NUM_VP8_MPU];
+ extern struct i965_kernel vp8_kernels_tpu[NUM_VP8_TPU];
+ extern struct i965_kernel vp8_kernels_brc_update[NUM_VP8_BRC_UPDATE];
+
+-static const uint32_t gen8_brc_init_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_brc_init_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_brc_reset_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_brc_reset_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_scaling_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/hme_downscale_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_me_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/hme_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_mbenc_i_frame_dist_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_intra_distortion_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_mbenc_i_frame_luma_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_enc_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_mbenc_i_frame_chroma_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_enc_genx_1.g8b"
+-};
+-
+-static const uint32_t gen8_mbenc_p_frame_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_enc_genx_2.g8b"
+-};
+-
+-static const uint32_t gen8_mpu_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_mpu_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_tpu_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_tpu_genx_0.g8b"
+-};
+-
+-static const uint32_t gen8_brc_update_bin_vp8[][4] = {
+-#include "shaders/brc/bsw/vp8_brc_update_genx_0.g8b"
+-};
+-
+ Bool
+ gen8_encoder_vp8_context_init(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ struct i965_encoder_vp8_context *vp8_context)
+ {
+ vp8_kernels_brc_init_reset[VP8_BRC_INIT].bin = gen8_brc_init_bin_vp8;
+- vp8_kernels_brc_init_reset[VP8_BRC_INIT].size = sizeof(gen8_brc_init_bin_vp8);
++ vp8_kernels_brc_init_reset[VP8_BRC_INIT].size = gen8_brc_init_bin_vp8_size;
+ vp8_kernels_brc_init_reset[VP8_BRC_RESET].bin = gen8_brc_reset_bin_vp8;
+- vp8_kernels_brc_init_reset[VP8_BRC_RESET].size = sizeof(gen8_brc_reset_bin_vp8);
++ vp8_kernels_brc_init_reset[VP8_BRC_RESET].size = gen8_brc_reset_bin_vp8_size;
+
+ /* scaling 4x and 16x use the same kernel */
+ vp8_kernels_scaling[VP8_SCALING_4X].bin = gen8_scaling_bin_vp8;
+- vp8_kernels_scaling[VP8_SCALING_4X].size = sizeof(gen8_scaling_bin_vp8);
++ vp8_kernels_scaling[VP8_SCALING_4X].size = gen8_scaling_bin_vp8_size;
+ vp8_kernels_scaling[VP8_SCALING_16X].bin = gen8_scaling_bin_vp8;
+- vp8_kernels_scaling[VP8_SCALING_16X].size = sizeof(gen8_scaling_bin_vp8);
++ vp8_kernels_scaling[VP8_SCALING_16X].size = gen8_scaling_bin_vp8_size;
+
+ /* me 4x and 16x use the same kernel */
+ vp8_kernels_me[VP8_ME_4X].bin = gen8_me_bin_vp8;
+- vp8_kernels_me[VP8_ME_4X].size = sizeof(gen8_me_bin_vp8);
++ vp8_kernels_me[VP8_ME_4X].size = gen8_me_bin_vp8_size;
+ vp8_kernels_me[VP8_ME_16X].bin = gen8_me_bin_vp8;
+- vp8_kernels_me[VP8_ME_16X].size = sizeof(gen8_me_bin_vp8);
++ vp8_kernels_me[VP8_ME_16X].size = gen8_me_bin_vp8_size;
+
+ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].bin = gen8_mbenc_i_frame_dist_bin_vp8;
+- vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].size = sizeof(gen8_mbenc_i_frame_dist_bin_vp8);
++ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].size = gen8_mbenc_i_frame_dist_bin_vp8_size;
+ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].bin = gen8_mbenc_i_frame_luma_bin_vp8;
+- vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].size = sizeof(gen8_mbenc_i_frame_luma_bin_vp8);
++ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].size = gen8_mbenc_i_frame_luma_bin_vp8_size;
+ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].bin = gen8_mbenc_i_frame_chroma_bin_vp8;
+- vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].size = sizeof(gen8_mbenc_i_frame_chroma_bin_vp8);
++ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].size = gen8_mbenc_i_frame_chroma_bin_vp8_size;
+ vp8_kernels_mbenc[VP8_MBENC_P_FRAME].bin = gen8_mbenc_p_frame_bin_vp8;
+- vp8_kernels_mbenc[VP8_MBENC_P_FRAME].size = sizeof(gen8_mbenc_p_frame_bin_vp8);
++ vp8_kernels_mbenc[VP8_MBENC_P_FRAME].size = gen8_mbenc_p_frame_bin_vp8_size;
+
+ vp8_kernels_mpu[VP8_MPU].bin = gen8_mpu_bin_vp8;
+- vp8_kernels_mpu[VP8_MPU].size = sizeof(gen8_mpu_bin_vp8);
++ vp8_kernels_mpu[VP8_MPU].size = gen8_mpu_bin_vp8_size;
+
+ vp8_kernels_brc_update[VP8_BRC_UPDATE].bin = gen8_brc_update_bin_vp8;
+- vp8_kernels_brc_update[VP8_BRC_UPDATE].size = sizeof(gen8_brc_update_bin_vp8);
++ vp8_kernels_brc_update[VP8_BRC_UPDATE].size = gen8_brc_update_bin_vp8_size;
+
+ vp8_kernels_tpu[VP8_TPU].bin = gen8_tpu_bin_vp8;
+- vp8_kernels_tpu[VP8_TPU].size = sizeof(gen8_tpu_bin_vp8);
++ vp8_kernels_tpu[VP8_TPU].size = gen8_tpu_bin_vp8_size;
+
+ vp8_context->idrt_entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ vp8_context->mocs = 0;
+diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
+index 033b50e..3317452 100644
+--- a/src/gen8_post_processing.c
++++ b/src/gen8_post_processing.c
+@@ -42,6 +42,7 @@
+ #include "i965_yuv_coefs.h"
+ #include "intel_media.h"
+
++#include "kernels.h"
+ #include "gen75_picture_process.h"
+ #include "intel_common_vpp_internal.h"
+
+@@ -324,10 +325,6 @@ static struct pp_module pp_modules_gen8[] = {
+
+ #define DEFAULT_MOCS 0
+
+-static const uint32_t pp_yuv420p8_scaling_gen8[][4] = {
+-#include "shaders/post_processing/gen8/conv_nv12.g8b"
+-};
+-
+ static void
+ gen8_pp_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
+ {
+@@ -1675,7 +1672,7 @@ gen8_post_processing_context_init(VADriverContextP ctx,
+ gpe_context = &pp_context->scaling_gpe_context;
+ memset(&scaling_kernel, 0, sizeof(scaling_kernel));
+ scaling_kernel.bin = pp_yuv420p8_scaling_gen8;
+- scaling_kernel.size = sizeof(pp_yuv420p8_scaling_gen8);
++ scaling_kernel.size = pp_yuv420p8_scaling_gen8_size;
+ gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
+ gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ gpe_context->idrt.max_entries = 1;
+diff --git a/src/gen9_encoder_vp8.c b/src/gen9_encoder_vp8.c
+index 2ac5efa..e2ea959 100644
+--- a/src/gen9_encoder_vp8.c
++++ b/src/gen9_encoder_vp8.c
+@@ -36,6 +36,7 @@
+ #include "intel_batchbuffer.h"
+ #include "intel_driver.h"
+
++#include "kernels.h"
+ #include "i965_defines.h"
+ #include "i965_drv_video.h"
+ #include "i965_encoder.h"
+@@ -49,89 +50,45 @@ extern struct i965_kernel vp8_kernels_mpu[NUM_VP8_MPU];
+ extern struct i965_kernel vp8_kernels_tpu[NUM_VP8_TPU];
+ extern struct i965_kernel vp8_kernels_brc_update[NUM_VP8_BRC_UPDATE];
+
+-static const uint32_t gen9_brc_init_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_brc_init_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_brc_reset_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_brc_reset_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_scaling_bin_vp8[][4] = {
+-#include "shaders/brc/skl/hme_downscale_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_me_bin_vp8[][4] = {
+-#include "shaders/brc/skl/hme_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_mbenc_i_frame_dist_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_intra_distortion_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_mbenc_i_frame_luma_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_enc_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_mbenc_i_frame_chroma_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_enc_genx_1.g9b"
+-};
+-
+-static const uint32_t gen9_mbenc_p_frame_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_enc_genx_2.g9b"
+-};
+-
+-static const uint32_t gen9_mpu_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_mpu_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_tpu_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_tpu_genx_0.g9b"
+-};
+-
+-static const uint32_t gen9_brc_update_bin_vp8[][4] = {
+-#include "shaders/brc/skl/vp8_brc_update_genx_0.g9b"
+-};
+-
+ Bool
+ gen9_encoder_vp8_context_init(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ struct i965_encoder_vp8_context *vp8_context)
+ {
+ vp8_kernels_brc_init_reset[VP8_BRC_INIT].bin = gen9_brc_init_bin_vp8;
+- vp8_kernels_brc_init_reset[VP8_BRC_INIT].size = sizeof(gen9_brc_init_bin_vp8);
++ vp8_kernels_brc_init_reset[VP8_BRC_INIT].size = gen9_brc_init_bin_vp8_size;
+ vp8_kernels_brc_init_reset[VP8_BRC_RESET].bin = gen9_brc_reset_bin_vp8;
+- vp8_kernels_brc_init_reset[VP8_BRC_RESET].size = sizeof(gen9_brc_reset_bin_vp8);
++ vp8_kernels_brc_init_reset[VP8_BRC_RESET].size = gen9_brc_reset_bin_vp8_size;
+
+ /* scaling 4x and 16x use the same kernel */
+ vp8_kernels_scaling[VP8_SCALING_4X].bin = gen9_scaling_bin_vp8;
+- vp8_kernels_scaling[VP8_SCALING_4X].size = sizeof(gen9_scaling_bin_vp8);
++ vp8_kernels_scaling[VP8_SCALING_4X].size = gen9_scaling_bin_vp8_size;
+ vp8_kernels_scaling[VP8_SCALING_16X].bin = gen9_scaling_bin_vp8;
+- vp8_kernels_scaling[VP8_SCALING_16X].size = sizeof(gen9_scaling_bin_vp8);
++ vp8_kernels_scaling[VP8_SCALING_16X].size = gen9_scaling_bin_vp8_size;
+
+ /* me 4x and 16x use the same kernel */
+ vp8_kernels_me[VP8_ME_4X].bin = gen9_me_bin_vp8;
+- vp8_kernels_me[VP8_ME_4X].size = sizeof(gen9_me_bin_vp8);
++ vp8_kernels_me[VP8_ME_4X].size = gen9_me_bin_vp8_size;
+ vp8_kernels_me[VP8_ME_16X].bin = gen9_me_bin_vp8;
+- vp8_kernels_me[VP8_ME_16X].size = sizeof(gen9_me_bin_vp8);
++ vp8_kernels_me[VP8_ME_16X].size = gen9_me_bin_vp8_size;
+
+ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].bin = gen9_mbenc_i_frame_dist_bin_vp8;
+- vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].size = sizeof(gen9_mbenc_i_frame_dist_bin_vp8);
++ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_DIST].size = gen9_mbenc_i_frame_dist_bin_vp8_size;
+ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].bin = gen9_mbenc_i_frame_luma_bin_vp8;
+- vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].size = sizeof(gen9_mbenc_i_frame_luma_bin_vp8);
++ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_LUMA].size = gen9_mbenc_i_frame_luma_bin_vp8_size;
+ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].bin = gen9_mbenc_i_frame_chroma_bin_vp8;
+- vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].size = sizeof(gen9_mbenc_i_frame_chroma_bin_vp8);
++ vp8_kernels_mbenc[VP8_MBENC_I_FRAME_CHROMA].size = gen9_mbenc_i_frame_chroma_bin_vp8_size;
+ vp8_kernels_mbenc[VP8_MBENC_P_FRAME].bin = gen9_mbenc_p_frame_bin_vp8;
+- vp8_kernels_mbenc[VP8_MBENC_P_FRAME].size = sizeof(gen9_mbenc_p_frame_bin_vp8);
++ vp8_kernels_mbenc[VP8_MBENC_P_FRAME].size = gen9_mbenc_p_frame_bin_vp8_size;
+
+ vp8_kernels_mpu[VP8_MPU].bin = gen9_mpu_bin_vp8;
+- vp8_kernels_mpu[VP8_MPU].size = sizeof(gen9_mpu_bin_vp8);
++ vp8_kernels_mpu[VP8_MPU].size = gen9_mpu_bin_vp8_size;
+
+ vp8_kernels_brc_update[VP8_BRC_UPDATE].bin = gen9_brc_update_bin_vp8;
+- vp8_kernels_brc_update[VP8_BRC_UPDATE].size = sizeof(gen9_brc_update_bin_vp8);
++ vp8_kernels_brc_update[VP8_BRC_UPDATE].size = gen9_brc_update_bin_vp8_size;
+
+ vp8_kernels_tpu[VP8_TPU].bin = gen9_tpu_bin_vp8;
+- vp8_kernels_tpu[VP8_TPU].size = sizeof(gen9_tpu_bin_vp8);
++ vp8_kernels_tpu[VP8_TPU].size = gen9_tpu_bin_vp8_size;
+
+ vp8_context->idrt_entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+
+diff --git a/src/gen9_hevc_enc_kernels_binary.h b/src/gen9_hevc_enc_kernels_binary.h
+index 29659fa..484421e 100644
+--- a/src/gen9_hevc_enc_kernels_binary.h
++++ b/src/gen9_hevc_enc_kernels_binary.h
+@@ -29,8 +29,7 @@
+ #ifndef GEN9_HEVC_ENCODER_KERNELS_BINARY_H
+ #define GEN9_HEVC_ENCODER_KERNELS_BINARY_H
+
+-#define GEN9_HEVC_ENC_KERNEL_SIZE 149296
+-
+-const unsigned int gen9_hevc_encoder_kernels[GEN9_HEVC_ENC_KERNEL_SIZE];
++const unsigned int* gen9_hevc_encoder_kernels;
++int gen9_hevc_encoder_kernels_size;
+
+ #endif
+diff --git a/src/gen9_hevc_encoder.c b/src/gen9_hevc_encoder.c
+index 80d9d9c..4732011 100644
+--- a/src/gen9_hevc_encoder.c
++++ b/src/gen9_hevc_encoder.c
+@@ -7355,7 +7355,7 @@ gen9_hevc_vme_context_init(VADriverContextP ctx,
+ struct gen9_hevc_encoder_state *priv_state = NULL;
+
+ hevc_enc_kernel_ptr = (void *)gen9_hevc_encoder_kernels;
+- hevc_enc_kernel_size = sizeof(gen9_hevc_encoder_kernels);
++ hevc_enc_kernel_size = gen9_hevc_encoder_kernels_size;
+
+ vme_context = calloc(1, sizeof(*vme_context));
+ priv_ctx = calloc(1, sizeof(*priv_ctx));
+diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
+index faa6598..07f2539 100644
+--- a/src/gen9_post_processing.c
++++ b/src/gen9_post_processing.c
+@@ -37,6 +37,7 @@
+ #include "i965_render.h"
+ #include "intel_media.h"
+
++#include "kernels.h"
+ #include "gen8_post_processing.h"
+ #include "gen75_picture_process.h"
+ #include "intel_gen_vppapi.h"
+@@ -110,40 +111,28 @@ static const uint32_t pp_nv12_blending_gen9[][4] = {
+
+ #define DEFAULT_MOCS 0x02
+
+-static const uint32_t pp_10bit_scaling_gen9[][4] = {
+-#include "shaders/post_processing/gen9/conv_p010.g9b"
+-};
+-
+-static const uint32_t pp_yuv420p8_scaling_gen9[][4] = {
+-#include "shaders/post_processing/gen9/conv_nv12.g9b"
+-};
+-
+-static const uint32_t pp_10bit_8bit_scaling_gen9[][4] = {
+-#include "shaders/post_processing/gen9/conv_10bit_8bit.g9b"
+-};
+-
+ struct i965_kernel pp_common_scaling_gen9[] = {
+ {
+ "10bit to 10bit",
+ 0,
+- pp_10bit_scaling_gen9,
+- sizeof(pp_10bit_scaling_gen9),
++ NULL,
++ 0,
+ NULL,
+ },
+
+ {
+ "8bit to 8bit",
+ 1,
+- pp_yuv420p8_scaling_gen9,
+- sizeof(pp_yuv420p8_scaling_gen9),
++ NULL,
++ 0,
+ NULL,
+ },
+
+ {
+ "10bit to 8bit",
+ 2,
+- pp_10bit_8bit_scaling_gen9,
+- sizeof(pp_10bit_8bit_scaling_gen9),
++ NULL,
++ 0,
+ NULL,
+ },
+ };
+@@ -535,6 +524,15 @@ gen9_post_processing_context_init(VADriverContextP ctx,
+
+ pp_context->intel_post_processing = gen9_post_processing;
+
++ /* load kernels */
++ pp_common_scaling_gen9[0].bin = pp_10bit_scaling_gen9;
++ pp_common_scaling_gen9[0].size = pp_10bit_scaling_gen9_size;
++ pp_common_scaling_gen9[1].bin = pp_yuv420p8_scaling_gen9;
++ pp_common_scaling_gen9[1].size = pp_yuv420p8_scaling_gen9_size;
++ pp_common_scaling_gen9[2].bin = pp_10bit_8bit_scaling_gen9;
++ pp_common_scaling_gen9[2].size = pp_10bit_8bit_scaling_gen9_size;
++ // TODO: handle missing kernels
++
+ gpe_context = &pp_context->scaling_gpe_context;
+ gen8_gpe_load_kernels(ctx, gpe_context, pp_common_scaling_gen9, ARRAY_ELEMS(pp_common_scaling_gen9));
+ gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
+index 8389dde..290c0eb 100644
+--- a/src/gen9_vp9_encoder.c
++++ b/src/gen9_vp9_encoder.c
+@@ -4283,7 +4283,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
+ memset(&scale_kernel, 0, sizeof(scale_kernel));
+
+ intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+- sizeof(media_vp9_kernels),
++ media_vp9_kernels_size,
+ INTEL_VP9_ENC_SCALING4X,
+ 0,
+ &scale_kernel);
+@@ -4304,7 +4304,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
+ memset(&scale_kernel, 0, sizeof(scale_kernel));
+
+ intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+- sizeof(media_vp9_kernels),
++ media_vp9_kernels_size,
+ INTEL_VP9_ENC_SCALING2X,
+ 0,
+ &scale_kernel);
+@@ -4346,7 +4346,7 @@ gen9_vme_me_context_init_vp9(VADriverContextP ctx,
+ memset(&scale_kernel, 0, sizeof(scale_kernel));
+
+ intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+- sizeof(media_vp9_kernels),
++ media_vp9_kernels_size,
+ INTEL_VP9_ENC_ME,
+ 0,
+ &scale_kernel);
+@@ -4394,7 +4394,7 @@ gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
+ memset(&scale_kernel, 0, sizeof(scale_kernel));
+
+ intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+- sizeof(media_vp9_kernels),
++ media_vp9_kernels_size,
+ INTEL_VP9_ENC_MBENC,
+ i,
+ &scale_kernel);
+@@ -4434,7 +4434,7 @@ gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
+ memset(&scale_kernel, 0, sizeof(scale_kernel));
+
+ intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+- sizeof(media_vp9_kernels),
++ media_vp9_kernels_size,
+ INTEL_VP9_ENC_BRC,
+ i,
+ &scale_kernel);
+@@ -4473,7 +4473,7 @@ gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
+ memset(&scale_kernel, 0, sizeof(scale_kernel));
+
+ intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
+- sizeof(media_vp9_kernels),
++ media_vp9_kernels_size,
+ INTEL_VP9_ENC_DYS,
+ 0,
+ &scale_kernel);
+diff --git a/src/gen9_vp9_encoder_kernels.h b/src/gen9_vp9_encoder_kernels.h
+index 961919c..bf2cabe 100644
+--- a/src/gen9_vp9_encoder_kernels.h
++++ b/src/gen9_vp9_encoder_kernels.h
+@@ -29,7 +29,7 @@
+ #ifndef _GEN9_VP9_ENCODER_KERNELS_H
+ #define _GEN9_VP9_ENCODER_KERNELS_H
+
+-#define AllVP9ENC_SZ 39334
+-extern const unsigned int media_vp9_kernels[AllVP9ENC_SZ];
++extern const unsigned int* media_vp9_kernels;
++extern int media_vp9_kernels_size;
+
+ #endif
+diff --git a/src/i965_avc_encoder.c b/src/i965_avc_encoder.c
+index 692f3c1..885e151 100644
+--- a/src/i965_avc_encoder.c
++++ b/src/i965_avc_encoder.c
+@@ -9703,18 +9703,18 @@ gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en
+ IS_BXT(i965->intel.device_info)) {
+ if (!encoder_context->fei_enabled) {
+ generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
+- generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
++ generic_ctx->enc_kernel_size = skl_avc_encoder_kernels_size;
+ } else {
+ generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
+- generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
++ generic_ctx->enc_kernel_size = skl_avc_fei_encoder_kernels_size;
+ }
+ } else if (IS_GEN8(i965->intel.device_info)) {
+ generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
+- generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
++ generic_ctx->enc_kernel_size = bdw_avc_encoder_kernels_size;
+ } else if (IS_KBL(i965->intel.device_info) ||
+ IS_GLK(i965->intel.device_info)) {
+ generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
+- generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
++ generic_ctx->enc_kernel_size = kbl_avc_encoder_kernels_size;
+ } else
+ goto allocate_structure_failed;
+
+diff --git a/src/i965_avc_encoder_kernels.h b/src/i965_avc_encoder_kernels.h
+index f7be54e..5576160 100644
+--- a/src/i965_avc_encoder_kernels.h
++++ b/src/i965_avc_encoder_kernels.h
+@@ -30,15 +30,14 @@
+ #ifndef _I965_AVC_ENCODER_KERNELS_H
+ #define _I965_AVC_ENCODER_KERNELS_H
+
+-#define AVC_ENC_SKL_SZ 96346
+-extern const unsigned int skl_avc_encoder_kernels[AVC_ENC_SKL_SZ];
++const unsigned int* skl_avc_encoder_kernels;
++const unsigned int* skl_avc_fei_encoder_kernels;
++const unsigned int* kbl_avc_encoder_kernels;
++const unsigned int* bdw_avc_encoder_kernels;
+
+-#define AVC_ENC_FEI_SKL_SZ 38000
+-extern const unsigned int skl_avc_fei_encoder_kernels[AVC_ENC_FEI_SKL_SZ];
++int skl_avc_encoder_kernels_size;
++int skl_avc_fei_encoder_kernels_size;
++int kbl_avc_encoder_kernels_size;
++int bdw_avc_encoder_kernels_size;
+
+-#define AVC_ENC_KBL_SZ 101994
+-extern const unsigned int kbl_avc_encoder_kernels[AVC_ENC_KBL_SZ];
+-
+-#define AVC_ENC_BDW_SZ 95532
+-extern const unsigned int bdw_avc_encoder_kernels[AVC_ENC_BDW_SZ];
+ #endif//_I965_AVC_ENCODER_KERNELS_H
+diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
+index 02f4895..687c710 100644
+--- a/src/i965_drv_video.c
++++ b/src/i965_drv_video.c
+@@ -52,6 +52,8 @@
+
+ #include "gen9_vp9_encapi.h"
+
++#include "kernels.h"
++
+ #define CONFIG_ID_OFFSET 0x01000000
+ #define CONTEXT_ID_OFFSET 0x02000000
+ #define SURFACE_ID_OFFSET 0x04000000
+@@ -7066,6 +7068,8 @@ i965_Terminate(VADriverContextP ctx)
+ ctx->pDriverData = NULL;
+ }
+
++ unload_externel_kernels();
++
+ return VA_STATUS_SUCCESS;
+ }
+
+@@ -7075,6 +7079,8 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx);
+ VAStatus
+ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
+ {
++ load_external_kernels();
++
+ struct VADriverVTable * const vtable = ctx->vtable;
+ struct VADriverVTableVPP * const vtable_vpp = ctx->vtable_vpp;
+
+diff --git a/src/kernels.c b/src/kernels.c
+new file mode 100644
+index 0000000..e8f55d4
+--- /dev/null
++++ b/src/kernels.c
+@@ -0,0 +1,202 @@
++#include "kernels.h"
++
++#include <dlfcn.h>
++
++#include "i965_avc_encoder_kernels.h"
++#include "gen9_hevc_enc_kernels.h"
++#include "gen9_vp9_encoder_kernels.h"
++
++#define DEFINE_EXTERNAL_KERNEL(name) \
++ const unsigned int* name = NULL; \
++ int name ##_size = 0
++
++DEFINE_EXTERNAL_KERNEL(media_vp9_kernels);
++DEFINE_EXTERNAL_KERNEL(gen9_hevc_encoder_kernels);
++DEFINE_EXTERNAL_KERNEL(skl_avc_encoder_kernels);
++DEFINE_EXTERNAL_KERNEL(skl_avc_fei_encoder_kernels);
++DEFINE_EXTERNAL_KERNEL(kbl_avc_encoder_kernels);
++DEFINE_EXTERNAL_KERNEL(bdw_avc_encoder_kernels);
++
++#undef DEFINE_EXTERNAL_KERNEL
++#define DEFINE_EXTERNAL_KERNEL(name) \
++ const uint32_t (*(name))[4] = NULL; \
++ int name ##_size = 0
++
++/* Gen 8 VP8 kernels */
++DEFINE_EXTERNAL_KERNEL(gen8_brc_init_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_brc_reset_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_scaling_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_me_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_dist_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_luma_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_chroma_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_mbenc_p_frame_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_mpu_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_tpu_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen8_brc_update_bin_vp8);
++
++/* Gen 9 VP8 kernels */
++DEFINE_EXTERNAL_KERNEL(gen9_brc_init_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_brc_reset_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_scaling_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_me_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_dist_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_luma_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_chroma_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_mbenc_p_frame_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_mpu_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_tpu_bin_vp8);
++DEFINE_EXTERNAL_KERNEL(gen9_brc_update_bin_vp8);
++
++/* Gen 7.5 sharpening kernels */
++DEFINE_EXTERNAL_KERNEL(gen75_gpe_sharpening_h_blur);
++DEFINE_EXTERNAL_KERNEL(gen75_gpe_sharpening_v_blur);
++DEFINE_EXTERNAL_KERNEL(gen75_gpe_sharpening_unmask);
++
++/* Gen 8 sharpening kernels */
++DEFINE_EXTERNAL_KERNEL(gen8_gpe_sharpening_h_blur);
++DEFINE_EXTERNAL_KERNEL(gen8_gpe_sharpening_v_blur);
++DEFINE_EXTERNAL_KERNEL(gen8_gpe_sharpening_unmask);
++/* Gen 8 post-processing kernels */
++DEFINE_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen8);
++
++/* Gen 9 post-processing kernels */
++DEFINE_EXTERNAL_KERNEL(pp_10bit_scaling_gen9);
++DEFINE_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen9);
++DEFINE_EXTERNAL_KERNEL(pp_10bit_8bit_scaling_gen9);
++
++static void* dso_handle = NULL;
++
++int load_external_kernels(void) {
++ /* already loaded */
++ if (dso_handle != NULL) {
++ return 1;
++ }
++
++ dso_handle = dlopen("./i965_drv_video_shaders.so", RTLD_LOCAL | RTLD_NOW);
++ if (dso_handle == NULL) {
++ goto err;
++ }
++
++#define LOAD_EXTERNAL_KERNEL(name) \
++ do { \
++ const int* tmp_size = dlsym(dso_handle, #name "_size"); \
++ name = dlsym(dso_handle, #name); \
++ if (tmp_size == NULL || name == NULL) { \
++ goto err; \
++ } \
++ name##_size = *tmp_size; \
++ } while(0)
++
++ LOAD_EXTERNAL_KERNEL(media_vp9_kernels);
++ LOAD_EXTERNAL_KERNEL(gen9_hevc_encoder_kernels);
++ LOAD_EXTERNAL_KERNEL(skl_avc_encoder_kernels);
++ LOAD_EXTERNAL_KERNEL(skl_avc_fei_encoder_kernels);
++ LOAD_EXTERNAL_KERNEL(kbl_avc_encoder_kernels);
++ LOAD_EXTERNAL_KERNEL(bdw_avc_encoder_kernels);
++
++ LOAD_EXTERNAL_KERNEL(gen8_brc_init_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen8_brc_reset_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen8_scaling_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen8_me_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_dist_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_luma_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_chroma_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen8_mbenc_p_frame_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen8_mpu_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen8_tpu_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen8_brc_update_bin_vp8);
++
++ LOAD_EXTERNAL_KERNEL(gen9_brc_init_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen9_brc_reset_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen9_scaling_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen9_me_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_dist_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_luma_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_chroma_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen9_mbenc_p_frame_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen9_mpu_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen9_tpu_bin_vp8);
++ LOAD_EXTERNAL_KERNEL(gen9_brc_update_bin_vp8);
++
++ LOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_h_blur);
++ LOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_v_blur);
++ LOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_unmask);
++
++ LOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_h_blur);
++ LOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_v_blur);
++ LOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_unmask);
++ LOAD_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen8);
++
++ LOAD_EXTERNAL_KERNEL(pp_10bit_scaling_gen9);
++ LOAD_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen9);
++ LOAD_EXTERNAL_KERNEL(pp_10bit_8bit_scaling_gen9);
++
++err:
++ unload_externel_kernels();
++ return 0;
++}
++
++void unload_externel_kernels(void) {
++ if (dso_handle == NULL) {
++ return;
++ }
++
++#define UNLOAD_EXTERNAL_KERNEL(name) \
++ do { \
++ name = NULL; \
++ name##_size = 0; \
++ } while(0)
++
++ UNLOAD_EXTERNAL_KERNEL(media_vp9_kernels);
++ UNLOAD_EXTERNAL_KERNEL(gen9_hevc_encoder_kernels);
++ UNLOAD_EXTERNAL_KERNEL(skl_avc_encoder_kernels);
++ UNLOAD_EXTERNAL_KERNEL(skl_avc_fei_encoder_kernels);
++ UNLOAD_EXTERNAL_KERNEL(kbl_avc_encoder_kernels);
++ UNLOAD_EXTERNAL_KERNEL(bdw_avc_encoder_kernels);
++
++ UNLOAD_EXTERNAL_KERNEL(gen8_brc_init_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen8_brc_reset_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen8_scaling_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen8_me_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_dist_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_luma_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen8_mbenc_i_frame_chroma_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen8_mbenc_p_frame_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen8_mpu_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen8_tpu_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen8_brc_update_bin_vp8);
++
++ UNLOAD_EXTERNAL_KERNEL(gen9_brc_init_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen9_brc_reset_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen9_scaling_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen9_me_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_dist_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_luma_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen9_mbenc_i_frame_chroma_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen9_mbenc_p_frame_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen9_mpu_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen9_tpu_bin_vp8);
++ UNLOAD_EXTERNAL_KERNEL(gen9_brc_update_bin_vp8);
++
++ UNLOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_h_blur);
++ UNLOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_v_blur);
++ UNLOAD_EXTERNAL_KERNEL(gen75_gpe_sharpening_unmask);
++
++ UNLOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_h_blur);
++ UNLOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_v_blur);
++ UNLOAD_EXTERNAL_KERNEL(gen8_gpe_sharpening_unmask);
++ UNLOAD_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen8);
++
++ UNLOAD_EXTERNAL_KERNEL(pp_10bit_scaling_gen9);
++ UNLOAD_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen9);
++ UNLOAD_EXTERNAL_KERNEL(pp_10bit_8bit_scaling_gen9);
++
++ dlclose(dso_handle);
++ dso_handle = NULL;
++}
++
++int has_external_kernels(void) {
++ return dso_handle != NULL;
++}
++
+diff --git a/src/kernels.h b/src/kernels.h
+new file mode 100644
+index 0000000..a0f1b63
+--- /dev/null
++++ b/src/kernels.h
+@@ -0,0 +1,54 @@
++#ifndef KERNELS_H
++#define KERNELS_H
++
++#include <stddef.h>
++#include <stdint.h>
++
++
++
++int load_external_kernels(void);
++void unload_externel_kernels(void);
++int has_external_kernels(void);
++
++#define DECLARE_EXTERNAL_KERNEL(name) \
++ const uint32_t (*(name))[4]; \
++ int name ##_size
++
++DECLARE_EXTERNAL_KERNEL(gen8_brc_init_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_brc_reset_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_scaling_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_me_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_dist_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_luma_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_mbenc_i_frame_chroma_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_mbenc_p_frame_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_mpu_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_tpu_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen8_brc_update_bin_vp8);
++
++DECLARE_EXTERNAL_KERNEL(gen9_brc_init_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_brc_reset_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_scaling_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_me_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_dist_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_luma_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_mbenc_i_frame_chroma_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_mbenc_p_frame_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_mpu_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_tpu_bin_vp8);
++DECLARE_EXTERNAL_KERNEL(gen9_brc_update_bin_vp8);
++
++DECLARE_EXTERNAL_KERNEL(gen75_gpe_sharpening_h_blur);
++DECLARE_EXTERNAL_KERNEL(gen75_gpe_sharpening_v_blur);
++DECLARE_EXTERNAL_KERNEL(gen75_gpe_sharpening_unmask);
++
++DECLARE_EXTERNAL_KERNEL(gen8_gpe_sharpening_h_blur);
++DECLARE_EXTERNAL_KERNEL(gen8_gpe_sharpening_v_blur);
++DECLARE_EXTERNAL_KERNEL(gen8_gpe_sharpening_unmask);
++DECLARE_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen8);
++
++DECLARE_EXTERNAL_KERNEL(pp_10bit_scaling_gen9);
++DECLARE_EXTERNAL_KERNEL(pp_yuv420p8_scaling_gen9);
++DECLARE_EXTERNAL_KERNEL(pp_10bit_8bit_scaling_gen9);
++
++#endif
diff --git a/debian/patches/0002-Check-if-kernels-are-available.patch b/debian/patches/0002-Check-if-kernels-are-available.patch
new file mode 100644
index 0000000..54170a6
--- /dev/null
+++ b/debian/patches/0002-Check-if-kernels-are-available.patch
@@ -0,0 +1,42 @@
+From: Sebastian Ramacher <sebastian at ramacher.at>
+Date: Mon, 16 Oct 2017 19:23:28 +0200
+Subject: Check if kernels are available
+
+---
+ src/i965_drv_video.h | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
+index 2d27b53..1c175c9 100644
+--- a/src/i965_drv_video.h
++++ b/src/i965_drv_video.h
+@@ -125,7 +125,8 @@
+ (ctx)->intel.has_bsd)
+
+ #define HAS_VP8_ENCODING(ctx) ((ctx)->codec_info->has_vp8_encoding && \
+- (ctx)->intel.has_bsd)
++ (ctx)->intel.has_bsd && \
++ has_external_kernels())
+
+ #define HAS_H264_MVC_DECODING(ctx) \
+ (HAS_H264_DECODING(ctx) && (ctx)->codec_info->h264_mvc_dec_profiles)
+@@ -144,7 +145,8 @@
+ (ctx)->intel.has_bsd)
+
+ #define HAS_VP9_DECODING(ctx) ((ctx)->codec_info->has_vp9_decoding && \
+- (ctx)->intel.has_bsd)
++ (ctx)->intel.has_bsd && \
++ has_external_kernels())
+
+ #define HAS_VP9_DECODING_PROFILE(ctx, profile) \
+ (HAS_VP9_DECODING(ctx) && \
+@@ -159,7 +161,8 @@
+ (ctx)->intel.has_bsd)
+
+ #define HAS_VP9_ENCODING(ctx) ((ctx)->codec_info->has_vp9_encoding && \
+- (ctx)->intel.has_bsd)
++ (ctx)->intel.has_bsd && \
++ has_external_kernels())
+
+ #define HAS_VP9_ENCODING_PROFILE(ctx, profile) \
+ (HAS_VP9_ENCODING(ctx) && \
diff --git a/debian/patches/0003-Allow-to-disable-building-of-split-kernels.patch b/debian/patches/0003-Allow-to-disable-building-of-split-kernels.patch
new file mode 100644
index 0000000..9ad79ff
--- /dev/null
+++ b/debian/patches/0003-Allow-to-disable-building-of-split-kernels.patch
@@ -0,0 +1,83 @@
+From: Sebastian Ramacher <sebastian at ramacher.at>
+Date: Mon, 16 Oct 2017 21:06:30 +0200
+Subject: Allow to disable building of split kernels
+
+---
+ configure.ac | 11 +++++++++++
+ src/Makefile.am | 12 +++---------
+ src/Makefile.sources | 10 ++++++++++
+ 3 files changed, 24 insertions(+), 9 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index d7fd36a..1acf374 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -72,6 +72,12 @@ AC_ARG_ENABLE([tests],
+ [build tests @<:@default=no@:>@])],
+ [], [enable_tests="no"])
+
++AC_ARG_ENABLE(split-kernels,
++ [AC_HELP_STRING([--enable-split-kernels],
++ [split kernels without source @<:@default=yes@:>@])],
++ [], [enable_split_kernels="yes"])
++
++
+ AC_DISABLE_STATIC
+ AC_PROG_LIBTOOL
+ AC_PROG_CC
+@@ -129,6 +135,11 @@ fi
+
+ AM_CONDITIONAL(ENABLE_TESTS, test "$enable_tests" = "yes")
+
++if test "$enable_split_kernels" = "yes"; then
++ AC_DEFINE([HAVE_SPLIT_KERNELS], [1], [DEfined to 1 if split kernels are enabled])
++fi
++AM_CONDITIONAL(HAVE_SPLIT_KERNELS, test "$enable_split_kernels" = "yes")
++
+ VA_VERSION=`$PKG_CONFIG --modversion libva`
+ VA_MAJOR_VERSION=`echo "$VA_VERSION" | cut -d'.' -f1`
+ VA_MINOR_VERSION=`echo "$VA_VERSION" | cut -d'.' -f2`
+diff --git a/src/Makefile.am b/src/Makefile.am
+index 7cfb46c..be403bc 100644
+--- a/src/Makefile.am
++++ b/src/Makefile.am
+@@ -66,18 +66,12 @@ i965_drv_video_la_LIBADD = libi965_drv_video.la $(driver_libs)
+ i965_drv_video_la_SOURCES =
+
+ # shaders module
++if HAVE_SPLIT_KERNELS
+ i965_drv_video_shaders_la_LTLIBRARIES = i965_drv_video_shaders.la
+ i965_drv_video_shaders_ladir = $(LIBVA_DRIVERS_PATH)
+ i965_drv_video_shaders_la_LDFLAGS = -module $(driver_ldflags)
+-i965_drv_video_shaders_la_SOURCES = \
+- gen75_vpp_gpe_kernels.c \
+- gen8_encoder_vp8_kernels.c \
+- gen8_post_processing_kernels.c \
+- gen9_hevc_enc_kernels_binary.c \
+- gen9_post_processing_kernels.c \
+- gen9_vp9_encoder_kernels.c \
+- i965_avc_encoder_kernels.c \
+- $(NULL)
++i965_drv_video_shaders_la_SOURCES = $(kernel_source_c)
++endif
+
+ noinst_HEADERS = $(source_h)
+
+diff --git a/src/Makefile.sources b/src/Makefile.sources
+index 0ba05f8..086db8f 100644
+--- a/src/Makefile.sources
++++ b/src/Makefile.sources
+@@ -131,3 +131,13 @@ source_h = \
+ gen9_hevc_enc_utils.h \
+ gen9_hevc_encoder.h \
+ $(NULL)
++
++kernel_source_c = \
++ gen75_vpp_gpe_kernels.c \
++ gen8_encoder_vp8_kernels.c \
++ gen8_post_processing_kernels.c \
++ gen9_hevc_enc_kernels_binary.c \
++ gen9_post_processing_kernels.c \
++ gen9_vp9_encoder_kernels.c \
++ i965_avc_encoder_kernels.c \
++ $(NULL)
diff --git a/debian/patches/0004-Load-scaling-kernels-only-if-available.patch b/debian/patches/0004-Load-scaling-kernels-only-if-available.patch
new file mode 100644
index 0000000..249ee05
--- /dev/null
+++ b/debian/patches/0004-Load-scaling-kernels-only-if-available.patch
@@ -0,0 +1,183 @@
+From: Sebastian Ramacher <sebastian at ramacher.at>
+Date: Mon, 16 Oct 2017 21:45:05 +0200
+Subject: Load scaling kernels only if available
+
+---
+ src/gen8_post_processing.c | 72 ++++++++++++++++++++++---------------------
+ src/gen9_post_processing.c | 76 ++++++++++++++++++++++++----------------------
+ 2 files changed, 77 insertions(+), 71 deletions(-)
+
+diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
+index 3317452..ea01ede 100644
+--- a/src/gen8_post_processing.c
++++ b/src/gen8_post_processing.c
+@@ -1669,43 +1669,45 @@ gen8_post_processing_context_init(VADriverContextP ctx,
+ * I420 ->I420
+ * I420 ->NV12
+ */
+- gpe_context = &pp_context->scaling_gpe_context;
+- memset(&scaling_kernel, 0, sizeof(scaling_kernel));
+- scaling_kernel.bin = pp_yuv420p8_scaling_gen8;
+- scaling_kernel.size = pp_yuv420p8_scaling_gen8_size;
+- gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
+- gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+- gpe_context->idrt.max_entries = 1;
+- gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+- gpe_context->sampler.max_entries = 1;
+- gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 32);
+-
+- gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
+- gpe_context->surface_state_binding_table.binding_table_offset = 0;
+- gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
+- gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN8, 64);
+-
+- if (i965->intel.eu_total > 0) {
+- gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
+- } else {
+- if (i965->intel.has_bsd2)
+- gpe_context->vfe_state.max_num_threads = 300;
+- else
+- gpe_context->vfe_state.max_num_threads = 60;
++ if (has_external_kernels()) {
++ gpe_context = &pp_context->scaling_gpe_context;
++ memset(&scaling_kernel, 0, sizeof(scaling_kernel));
++ scaling_kernel.bin = pp_yuv420p8_scaling_gen8;
++ scaling_kernel.size = pp_yuv420p8_scaling_gen8_size;
++ gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
++ gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
++ gpe_context->idrt.max_entries = 1;
++ gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
++ gpe_context->sampler.max_entries = 1;
++ gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 32);
++
++ gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
++ gpe_context->surface_state_binding_table.binding_table_offset = 0;
++ gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
++ gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN8, 64);
++
++ if (i965->intel.eu_total > 0) {
++ gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
++ } else {
++ if (i965->intel.has_bsd2)
++ gpe_context->vfe_state.max_num_threads = 300;
++ else
++ gpe_context->vfe_state.max_num_threads = 60;
++ }
++
++ gpe_context->vfe_state.curbe_allocation_size = 37;
++ gpe_context->vfe_state.urb_entry_size = 16;
++ if (i965->intel.has_bsd2)
++ gpe_context->vfe_state.num_urb_entries = 127;
++ else
++ gpe_context->vfe_state.num_urb_entries = 64;
++
++ gpe_context->vfe_state.gpgpu_mode = 0;
++
++ gen8_gpe_context_init(ctx, gpe_context);
++ pp_context->scaling_gpe_context_initialized |= VPPGPE_8BIT_8BIT;
+ }
+
+- gpe_context->vfe_state.curbe_allocation_size = 37;
+- gpe_context->vfe_state.urb_entry_size = 16;
+- if (i965->intel.has_bsd2)
+- gpe_context->vfe_state.num_urb_entries = 127;
+- else
+- gpe_context->vfe_state.num_urb_entries = 64;
+-
+- gpe_context->vfe_state.gpgpu_mode = 0;
+-
+- gen8_gpe_context_init(ctx, gpe_context);
+- pp_context->scaling_gpe_context_initialized |= VPPGPE_8BIT_8BIT;
+-
+ return;
+ }
+
+diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
+index 07f2539..6626177 100644
+--- a/src/gen9_post_processing.c
++++ b/src/gen9_post_processing.c
+@@ -524,44 +524,48 @@ gen9_post_processing_context_init(VADriverContextP ctx,
+
+ pp_context->intel_post_processing = gen9_post_processing;
+
+- /* load kernels */
+- pp_common_scaling_gen9[0].bin = pp_10bit_scaling_gen9;
+- pp_common_scaling_gen9[0].size = pp_10bit_scaling_gen9_size;
+- pp_common_scaling_gen9[1].bin = pp_yuv420p8_scaling_gen9;
+- pp_common_scaling_gen9[1].size = pp_yuv420p8_scaling_gen9_size;
+- pp_common_scaling_gen9[2].bin = pp_10bit_8bit_scaling_gen9;
+- pp_common_scaling_gen9[2].size = pp_10bit_8bit_scaling_gen9_size;
+- // TODO: handle missing kernels
+-
+- gpe_context = &pp_context->scaling_gpe_context;
+- gen8_gpe_load_kernels(ctx, gpe_context, pp_common_scaling_gen9, ARRAY_ELEMS(pp_common_scaling_gen9));
+- gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+- gpe_context->idrt.max_entries = ALIGN(ARRAY_ELEMS(pp_common_scaling_gen9), 2);
+- gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+- gpe_context->sampler.max_entries = 1;
+- gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);
+-
+- gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
+- gpe_context->surface_state_binding_table.binding_table_offset = 0;
+- gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
+- gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
+-
+- if (i965->intel.eu_total > 0) {
+- gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
+- } else {
+- if (i965->intel.has_bsd2)
+- gpe_context->vfe_state.max_num_threads = 300;
+- else
+- gpe_context->vfe_state.max_num_threads = 60;
++ /* load kernels if available */
++ if (pp_common_scaling_gen9[0].bin == NULL && has_external_kernels()) {
++ pp_common_scaling_gen9[0].bin = pp_10bit_scaling_gen9;
++ pp_common_scaling_gen9[0].size = pp_10bit_scaling_gen9_size;
++ pp_common_scaling_gen9[1].bin = pp_yuv420p8_scaling_gen9;
++ pp_common_scaling_gen9[1].size = pp_yuv420p8_scaling_gen9_size;
++ pp_common_scaling_gen9[2].bin = pp_10bit_8bit_scaling_gen9;
++ pp_common_scaling_gen9[2].size = pp_10bit_8bit_scaling_gen9_size;
+ }
+
+- gpe_context->vfe_state.curbe_allocation_size = 37;
+- gpe_context->vfe_state.urb_entry_size = 16;
+- gpe_context->vfe_state.num_urb_entries = 127;
+- gpe_context->vfe_state.gpgpu_mode = 0;
+-
+- gen8_gpe_context_init(ctx, gpe_context);
+- pp_context->scaling_gpe_context_initialized |= (VPPGPE_8BIT_8BIT | VPPGPE_10BIT_10BIT | VPPGPE_10BIT_8BIT);
++ /* initialize scaling context only if kernels are available */
++ if (pp_common_scaling_gen9[0].bin != NULL) {
++ gpe_context = &pp_context->scaling_gpe_context;
++ gen8_gpe_load_kernels(ctx, gpe_context, pp_common_scaling_gen9, ARRAY_ELEMS(pp_common_scaling_gen9));
++ gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
++ gpe_context->idrt.max_entries = ALIGN(ARRAY_ELEMS(pp_common_scaling_gen9), 2);
++ gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
++ gpe_context->sampler.max_entries = 1;
++ gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);
++
++ gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
++ gpe_context->surface_state_binding_table.binding_table_offset = 0;
++ gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
++ gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
++
++ if (i965->intel.eu_total > 0) {
++ gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
++ } else {
++ if (i965->intel.has_bsd2)
++ gpe_context->vfe_state.max_num_threads = 300;
++ else
++ gpe_context->vfe_state.max_num_threads = 60;
++ }
++
++ gpe_context->vfe_state.curbe_allocation_size = 37;
++ gpe_context->vfe_state.urb_entry_size = 16;
++ gpe_context->vfe_state.num_urb_entries = 127;
++ gpe_context->vfe_state.gpgpu_mode = 0;
++
++ gen8_gpe_context_init(ctx, gpe_context);
++ pp_context->scaling_gpe_context_initialized |= (VPPGPE_8BIT_8BIT | VPPGPE_10BIT_10BIT | VPPGPE_10BIT_8BIT);
++ }
+
+ return;
+ }
diff --git a/debian/patches/series b/debian/patches/series
new file mode 100644
index 0000000..6088c96
--- /dev/null
+++ b/debian/patches/series
@@ -0,0 +1,4 @@
+0001-Move-shaders-without-source-to-extra-library.patch
+0002-Check-if-kernels-are-available.patch
+0003-Allow-to-disable-building-of-split-kernels.patch
+0004-Load-scaling-kernels-only-if-available.patch
--
intel-vaapi-driver packaging
More information about the pkg-multimedia-commits
mailing list